This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 5993dffb71 Migrate `parquet-variant` to Rust 2024 (#8510)
5993dffb71 is described below
commit 5993dffb714e47a720f810cc80b04176b591930a
Author: Matthijs Brobbel <[email protected]>
AuthorDate: Tue Oct 7 13:38:13 2025 +0200
Migrate `parquet-variant` to Rust 2024 (#8510)
# Which issue does this PR close?
- Contribute to #6827
# Rationale for this change
Splitting up #8227.
# What changes are included in this PR?
Migrate `parquet-variant` to Rust 2024
# Are these changes tested?
CI
# Are there any user-facing changes?
Yes
---
parquet-variant/Cargo.toml | 2 +-
parquet-variant/benches/variant_builder.rs | 4 +-
parquet-variant/src/builder.rs | 12 ++--
parquet-variant/src/decoder.rs | 4 +-
parquet-variant/src/variant.rs | 6 +-
parquet-variant/src/variant/decimal.rs | 90 +++++++++++++++++------------
parquet-variant/src/variant/list.rs | 2 +-
parquet-variant/src/variant/metadata.rs | 13 ++---
parquet-variant/src/variant/object.rs | 2 +-
parquet-variant/tests/variant_interop.rs | 93 +++++++++++++++++++++++++-----
10 files changed, 156 insertions(+), 72 deletions(-)
diff --git a/parquet-variant/Cargo.toml b/parquet-variant/Cargo.toml
index f1282e8cda..b1985e5f35 100644
--- a/parquet-variant/Cargo.toml
+++ b/parquet-variant/Cargo.toml
@@ -27,7 +27,7 @@ repository = { workspace = true }
authors = { workspace = true }
keywords = ["arrow", "parquet", "variant"]
readme = "README.md"
-edition = { workspace = true }
+edition = "2024"
rust-version = { workspace = true }
[dependencies]
diff --git a/parquet-variant/benches/variant_builder.rs
b/parquet-variant/benches/variant_builder.rs
index 5d00cc054e..420fa583ee 100644
--- a/parquet-variant/benches/variant_builder.rs
+++ b/parquet-variant/benches/variant_builder.rs
@@ -21,9 +21,9 @@ use criterion::*;
use parquet_variant::{Variant, VariantBuilder};
use rand::{
- distr::{uniform::SampleUniform, Alphanumeric},
- rngs::StdRng,
Rng, SeedableRng,
+ distr::{Alphanumeric, uniform::SampleUniform},
+ rngs::StdRng,
};
use std::{hint, ops::Range};
diff --git a/parquet-variant/src/builder.rs b/parquet-variant/src/builder.rs
index 17455fc4bf..d44e9b10a3 100644
--- a/parquet-variant/src/builder.rs
+++ b/parquet-variant/src/builder.rs
@@ -16,7 +16,7 @@
// under the License.
use crate::decoder::{VariantBasicType, VariantPrimitiveType};
use crate::{
- ShortString, Variant, VariantDecimal16, VariantDecimal4, VariantDecimal8,
VariantList,
+ ShortString, Variant, VariantDecimal4, VariantDecimal8, VariantDecimal16,
VariantList,
VariantMetadata, VariantObject,
};
use arrow_schema::ArrowError;
@@ -3403,10 +3403,12 @@ mod tests {
// This should fail because "unknown_field" is not in the metadata
let result = obj.try_insert("unknown_field", "value");
assert!(result.is_err());
- assert!(result
- .unwrap_err()
- .to_string()
- .contains("Field name 'unknown_field' not found"));
+ assert!(
+ result
+ .unwrap_err()
+ .to_string()
+ .contains("Field name 'unknown_field' not found")
+ );
}
}
diff --git a/parquet-variant/src/decoder.rs b/parquet-variant/src/decoder.rs
index 26b4e204fa..8cf3cec112 100644
--- a/parquet-variant/src/decoder.rs
+++ b/parquet-variant/src/decoder.rs
@@ -14,10 +14,10 @@
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
+use crate::ShortString;
use crate::utils::{
array_from_slice, overflow_error, slice_from_slice_at_offset,
string_from_slice,
};
-use crate::ShortString;
use arrow_schema::ArrowError;
use chrono::{DateTime, Duration, NaiveDate, NaiveDateTime, NaiveTime, Utc};
@@ -143,7 +143,7 @@ impl OffsetSizeBytes {
_ => {
return Err(ArrowError::InvalidArgumentError(
"offset_size_minus_one must be 0–3".to_string(),
- ))
+ ));
}
};
Ok(result)
diff --git a/parquet-variant/src/variant.rs b/parquet-variant/src/variant.rs
index 849947675b..5663ab155c 100644
--- a/parquet-variant/src/variant.rs
+++ b/parquet-variant/src/variant.rs
@@ -15,9 +15,9 @@
// specific language governing permissions and limitations
// under the License.
-pub use self::decimal::{VariantDecimal16, VariantDecimal4, VariantDecimal8};
+pub use self::decimal::{VariantDecimal4, VariantDecimal8, VariantDecimal16};
pub use self::list::VariantList;
-pub use self::metadata::{VariantMetadata, EMPTY_VARIANT_METADATA,
EMPTY_VARIANT_METADATA_BYTES};
+pub use self::metadata::{EMPTY_VARIANT_METADATA, EMPTY_VARIANT_METADATA_BYTES,
VariantMetadata};
pub use self::object::VariantObject;
// Publically export types used in the API
@@ -25,7 +25,7 @@ pub use half::f16;
pub use uuid::Uuid;
use crate::decoder::{
- self, get_basic_type, get_primitive_type, VariantBasicType,
VariantPrimitiveType,
+ self, VariantBasicType, VariantPrimitiveType, get_basic_type,
get_primitive_type,
};
use crate::path::{VariantPath, VariantPathElement};
use crate::utils::{first_byte_from_slice, fits_precision, slice_from_slice};
diff --git a/parquet-variant/src/variant/decimal.rs
b/parquet-variant/src/variant/decimal.rs
index 4793d88569..b0b7d36ed1 100644
--- a/parquet-variant/src/variant/decimal.rs
+++ b/parquet-variant/src/variant/decimal.rs
@@ -285,20 +285,24 @@ mod tests {
decimal4_too_large.is_err(),
"Decimal4 precision overflow should fail"
);
- assert!(decimal4_too_large
- .unwrap_err()
- .to_string()
- .contains("wider than max precision"));
+ assert!(
+ decimal4_too_large
+ .unwrap_err()
+ .to_string()
+ .contains("wider than max precision")
+ );
let decimal4_too_small = VariantDecimal4::try_new(-1_000_000_000_i32,
2);
assert!(
decimal4_too_small.is_err(),
"Decimal4 precision underflow should fail"
);
- assert!(decimal4_too_small
- .unwrap_err()
- .to_string()
- .contains("wider than max precision"));
+ assert!(
+ decimal4_too_small
+ .unwrap_err()
+ .to_string()
+ .contains("wider than max precision")
+ );
// Test valid edge cases for Decimal4
let decimal4_max_valid = VariantDecimal4::try_new(999_999_999_i32, 2);
@@ -319,20 +323,24 @@ mod tests {
decimal8_too_large.is_err(),
"Decimal8 precision overflow should fail"
);
- assert!(decimal8_too_large
- .unwrap_err()
- .to_string()
- .contains("wider than max precision"));
+ assert!(
+ decimal8_too_large
+ .unwrap_err()
+ .to_string()
+ .contains("wider than max precision")
+ );
let decimal8_too_small =
VariantDecimal8::try_new(-1_000_000_000_000_000_000_i64, 2);
assert!(
decimal8_too_small.is_err(),
"Decimal8 precision underflow should fail"
);
- assert!(decimal8_too_small
- .unwrap_err()
- .to_string()
- .contains("wider than max precision"));
+ assert!(
+ decimal8_too_small
+ .unwrap_err()
+ .to_string()
+ .contains("wider than max precision")
+ );
// Test valid edge cases for Decimal8
let decimal8_max_valid =
VariantDecimal8::try_new(999_999_999_999_999_999_i64, 2);
@@ -354,10 +362,12 @@ mod tests {
decimal16_too_large.is_err(),
"Decimal16 precision overflow should fail"
);
- assert!(decimal16_too_large
- .unwrap_err()
- .to_string()
- .contains("wider than max precision"));
+ assert!(
+ decimal16_too_large
+ .unwrap_err()
+ .to_string()
+ .contains("wider than max precision")
+ );
let decimal16_too_small =
VariantDecimal16::try_new(-100000000000000000000000000000000000000_i128, 2);
@@ -365,10 +375,12 @@ mod tests {
decimal16_too_small.is_err(),
"Decimal16 precision underflow should fail"
);
- assert!(decimal16_too_small
- .unwrap_err()
- .to_string()
- .contains("wider than max precision"));
+ assert!(
+ decimal16_too_small
+ .unwrap_err()
+ .to_string()
+ .contains("wider than max precision")
+ );
// Test valid edge cases for Decimal16
let decimal16_max_valid =
@@ -394,10 +406,12 @@ mod tests {
decimal4_invalid_scale.is_err(),
"Decimal4 with scale > 9 should fail"
);
- assert!(decimal4_invalid_scale
- .unwrap_err()
- .to_string()
- .contains("larger than max precision"));
+ assert!(
+ decimal4_invalid_scale
+ .unwrap_err()
+ .to_string()
+ .contains("larger than max precision")
+ );
let decimal4_invalid_scale_large = VariantDecimal4::try_new(123_i32,
20);
assert!(
@@ -418,10 +432,12 @@ mod tests {
decimal8_invalid_scale.is_err(),
"Decimal8 with scale > 18 should fail"
);
- assert!(decimal8_invalid_scale
- .unwrap_err()
- .to_string()
- .contains("larger than max precision"));
+ assert!(
+ decimal8_invalid_scale
+ .unwrap_err()
+ .to_string()
+ .contains("larger than max precision")
+ );
let decimal8_invalid_scale_large = VariantDecimal8::try_new(123_i64,
25);
assert!(
@@ -442,10 +458,12 @@ mod tests {
decimal16_invalid_scale.is_err(),
"Decimal16 with scale > 38 should fail"
);
- assert!(decimal16_invalid_scale
- .unwrap_err()
- .to_string()
- .contains("larger than max precision"));
+ assert!(
+ decimal16_invalid_scale
+ .unwrap_err()
+ .to_string()
+ .contains("larger than max precision")
+ );
let decimal16_invalid_scale_large =
VariantDecimal16::try_new(123_i128, 50);
assert!(
diff --git a/parquet-variant/src/variant/list.rs
b/parquet-variant/src/variant/list.rs
index d14d3a7796..a150295717 100644
--- a/parquet-variant/src/variant/list.rs
+++ b/parquet-variant/src/variant/list.rs
@@ -14,7 +14,7 @@
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
-use crate::decoder::{map_bytes_to_offsets, OffsetSizeBytes};
+use crate::decoder::{OffsetSizeBytes, map_bytes_to_offsets};
use crate::utils::{
first_byte_from_slice, overflow_error, slice_from_slice,
slice_from_slice_at_offset,
};
diff --git a/parquet-variant/src/variant/metadata.rs
b/parquet-variant/src/variant/metadata.rs
index 604ee0e890..9f9688acd0 100644
--- a/parquet-variant/src/variant/metadata.rs
+++ b/parquet-variant/src/variant/metadata.rs
@@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.
-use crate::decoder::{map_bytes_to_offsets, OffsetSizeBytes};
+use crate::decoder::{OffsetSizeBytes, map_bytes_to_offsets};
use crate::utils::{
first_byte_from_slice, overflow_error, slice_from_slice, string_from_slice,
try_binary_search_range_by,
@@ -285,14 +285,13 @@ impl<'m> VariantMetadata<'m> {
let mut current_offset = offsets.next().unwrap_or(0);
let mut prev_value: Option<&str> = None;
for next_offset in offsets {
- let current_value =
- value_buffer
- .get(current_offset..next_offset)
- .ok_or_else(|| {
- ArrowError::InvalidArgumentError(format!(
+ let current_value =
value_buffer.get(current_offset..next_offset).ok_or_else(
+ || {
+ ArrowError::InvalidArgumentError(format!(
"range {current_offset}..{next_offset} is
invalid or out of bounds"
))
- })?;
+ },
+ )?;
if let Some(prev_val) = prev_value {
if current_value <= prev_val {
diff --git a/parquet-variant/src/variant/object.rs
b/parquet-variant/src/variant/object.rs
index 713be977b9..4e6faa7ab2 100644
--- a/parquet-variant/src/variant/object.rs
+++ b/parquet-variant/src/variant/object.rs
@@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.
-use crate::decoder::{map_bytes_to_offsets, OffsetSizeBytes};
+use crate::decoder::{OffsetSizeBytes, map_bytes_to_offsets};
use crate::utils::{
first_byte_from_slice, overflow_error, slice_from_slice,
try_binary_search_range_by,
};
diff --git a/parquet-variant/tests/variant_interop.rs
b/parquet-variant/tests/variant_interop.rs
index 00c326c064..d931d39293 100644
--- a/parquet-variant/tests/variant_interop.rs
+++ b/parquet-variant/tests/variant_interop.rs
@@ -23,7 +23,7 @@ use std::{env, fs};
use chrono::{DateTime, NaiveDate, NaiveTime};
use parquet_variant::{
- ShortString, Variant, VariantBuilder, VariantDecimal16, VariantDecimal4,
VariantDecimal8,
+ ShortString, Variant, VariantBuilder, VariantDecimal4, VariantDecimal8,
VariantDecimal16,
};
use rand::rngs::StdRng;
@@ -109,14 +109,29 @@ fn get_primitive_cases() -> Vec<(&'static str,
Variant<'static, 'static>)> {
// Cases are commented out
// Enabling is tracked in https://github.com/apache/arrow-rs/issues/7630
vec![
- ("primitive_binary", Variant::Binary(&[0x03, 0x13, 0x37, 0xde, 0xad,
0xbe, 0xef, 0xca, 0xfe])),
+ (
+ "primitive_binary",
+ Variant::Binary(&[0x03, 0x13, 0x37, 0xde, 0xad, 0xbe, 0xef, 0xca,
0xfe]),
+ ),
("primitive_boolean_false", Variant::BooleanFalse),
("primitive_boolean_true", Variant::BooleanTrue),
- ("primitive_date", Variant::Date(NaiveDate::from_ymd_opt(2025, 4 ,
16).unwrap())),
- ("primitive_decimal4", Variant::from(VariantDecimal4::try_new(1234i32,
2u8).unwrap())),
+ (
+ "primitive_date",
+ Variant::Date(NaiveDate::from_ymd_opt(2025, 4, 16).unwrap()),
+ ),
+ (
+ "primitive_decimal4",
+ Variant::from(VariantDecimal4::try_new(1234i32, 2u8).unwrap()),
+ ),
// ("primitive_decimal8", Variant::Decimal8{integer: 1234567890,
scale: 2}),
- ("primitive_decimal8",
Variant::Decimal8(VariantDecimal8::try_new(1234567890,2).unwrap())),
- ("primitive_decimal16",
Variant::Decimal16(VariantDecimal16::try_new(1234567891234567890, 2).unwrap())),
+ (
+ "primitive_decimal8",
+ Variant::Decimal8(VariantDecimal8::try_new(1234567890,
2).unwrap()),
+ ),
+ (
+ "primitive_decimal16",
+ Variant::Decimal16(VariantDecimal16::try_new(1234567891234567890,
2).unwrap()),
+ ),
("primitive_float", Variant::Float(1234567890.1234)),
("primitive_double", Variant::Double(1234567890.1234)),
("primitive_int8", Variant::Int8(42)),
@@ -124,14 +139,64 @@ fn get_primitive_cases() -> Vec<(&'static str,
Variant<'static, 'static>)> {
("primitive_int32", Variant::Int32(123456)),
("primitive_int64", Variant::Int64(1234567890123456789)),
("primitive_null", Variant::Null),
- ("primitive_string", Variant::String("This string is longer than 64
bytes and therefore does not fit in a short_string and it also includes several
non ascii characters such as 🐢, 💖, ♥\u{fe0f}, 🎣 and 🤦!!")),
- ("primitive_timestamp",
Variant::TimestampMicros(NaiveDate::from_ymd_opt(2025, 4,
16).unwrap().and_hms_milli_opt(16, 34, 56, 780).unwrap().and_utc())),
- ("primitive_timestampntz",
Variant::TimestampNtzMicros(NaiveDate::from_ymd_opt(2025, 4,
16).unwrap().and_hms_milli_opt(12, 34, 56, 780).unwrap())),
- ("primitive_timestamp_nanos",
Variant::TimestampNanos(NaiveDate::from_ymd_opt(2024, 11,
7).unwrap().and_hms_nano_opt(12, 33, 54, 123456789).unwrap().and_utc())),
- ("primitive_timestampntz_nanos",
Variant::TimestampNtzNanos(NaiveDate::from_ymd_opt(2024, 11,
7).unwrap().and_hms_nano_opt(12, 33, 54, 123456789).unwrap())),
- ("primitive_uuid",
Variant::Uuid(Uuid::parse_str("f24f9b64-81fa-49d1-b74e-8c09a6e31c56").unwrap())),
- ("short_string", Variant::ShortString(ShortString::try_new("Less than
64 bytes (❤\u{fe0f} with utf8)").unwrap())),
- ("primitive_time", Variant::Time(NaiveTime::from_hms_micro_opt(12, 33,
54, 123456).unwrap())),
+ (
+ "primitive_string",
+ Variant::String(
+ "This string is longer than 64 bytes and therefore does not
fit in a short_string and it also includes several non ascii characters such as
🐢, 💖, ♥\u{fe0f}, 🎣 and 🤦!!",
+ ),
+ ),
+ (
+ "primitive_timestamp",
+ Variant::TimestampMicros(
+ NaiveDate::from_ymd_opt(2025, 4, 16)
+ .unwrap()
+ .and_hms_milli_opt(16, 34, 56, 780)
+ .unwrap()
+ .and_utc(),
+ ),
+ ),
+ (
+ "primitive_timestampntz",
+ Variant::TimestampNtzMicros(
+ NaiveDate::from_ymd_opt(2025, 4, 16)
+ .unwrap()
+ .and_hms_milli_opt(12, 34, 56, 780)
+ .unwrap(),
+ ),
+ ),
+ (
+ "primitive_timestamp_nanos",
+ Variant::TimestampNanos(
+ NaiveDate::from_ymd_opt(2024, 11, 7)
+ .unwrap()
+ .and_hms_nano_opt(12, 33, 54, 123456789)
+ .unwrap()
+ .and_utc(),
+ ),
+ ),
+ (
+ "primitive_timestampntz_nanos",
+ Variant::TimestampNtzNanos(
+ NaiveDate::from_ymd_opt(2024, 11, 7)
+ .unwrap()
+ .and_hms_nano_opt(12, 33, 54, 123456789)
+ .unwrap(),
+ ),
+ ),
+ (
+ "primitive_uuid",
+
Variant::Uuid(Uuid::parse_str("f24f9b64-81fa-49d1-b74e-8c09a6e31c56").unwrap()),
+ ),
+ (
+ "short_string",
+ Variant::ShortString(
+ ShortString::try_new("Less than 64 bytes (❤\u{fe0f} with
utf8)").unwrap(),
+ ),
+ ),
+ (
+ "primitive_time",
+ Variant::Time(NaiveTime::from_hms_micro_opt(12, 33, 54,
123456).unwrap()),
+ ),
]
}
#[test]