This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 5993dffb71 Migrate `parquet-variant` to Rust 2024 (#8510)
5993dffb71 is described below

commit 5993dffb714e47a720f810cc80b04176b591930a
Author: Matthijs Brobbel <[email protected]>
AuthorDate: Tue Oct 7 13:38:13 2025 +0200

    Migrate `parquet-variant` to Rust 2024 (#8510)
    
    # Which issue does this PR close?
    
    - Contribute to #6827
    
    # Rationale for this change
    
    Splitting up #8227.
    
    # What changes are included in this PR?
    
    Migrate `parquet-variant` to Rust 2024
    
    # Are these changes tested?
    
    CI
    
    # Are there any user-facing changes?
    
    Yes
---
 parquet-variant/Cargo.toml                 |  2 +-
 parquet-variant/benches/variant_builder.rs |  4 +-
 parquet-variant/src/builder.rs             | 12 ++--
 parquet-variant/src/decoder.rs             |  4 +-
 parquet-variant/src/variant.rs             |  6 +-
 parquet-variant/src/variant/decimal.rs     | 90 +++++++++++++++++------------
 parquet-variant/src/variant/list.rs        |  2 +-
 parquet-variant/src/variant/metadata.rs    | 13 ++---
 parquet-variant/src/variant/object.rs      |  2 +-
 parquet-variant/tests/variant_interop.rs   | 93 +++++++++++++++++++++++++-----
 10 files changed, 156 insertions(+), 72 deletions(-)

diff --git a/parquet-variant/Cargo.toml b/parquet-variant/Cargo.toml
index f1282e8cda..b1985e5f35 100644
--- a/parquet-variant/Cargo.toml
+++ b/parquet-variant/Cargo.toml
@@ -27,7 +27,7 @@ repository = { workspace = true }
 authors = { workspace = true }
 keywords = ["arrow", "parquet", "variant"]
 readme = "README.md"
-edition = { workspace = true }
+edition = "2024"
 rust-version = { workspace = true }
 
 [dependencies]
diff --git a/parquet-variant/benches/variant_builder.rs 
b/parquet-variant/benches/variant_builder.rs
index 5d00cc054e..420fa583ee 100644
--- a/parquet-variant/benches/variant_builder.rs
+++ b/parquet-variant/benches/variant_builder.rs
@@ -21,9 +21,9 @@ use criterion::*;
 
 use parquet_variant::{Variant, VariantBuilder};
 use rand::{
-    distr::{uniform::SampleUniform, Alphanumeric},
-    rngs::StdRng,
     Rng, SeedableRng,
+    distr::{Alphanumeric, uniform::SampleUniform},
+    rngs::StdRng,
 };
 use std::{hint, ops::Range};
 
diff --git a/parquet-variant/src/builder.rs b/parquet-variant/src/builder.rs
index 17455fc4bf..d44e9b10a3 100644
--- a/parquet-variant/src/builder.rs
+++ b/parquet-variant/src/builder.rs
@@ -16,7 +16,7 @@
 // under the License.
 use crate::decoder::{VariantBasicType, VariantPrimitiveType};
 use crate::{
-    ShortString, Variant, VariantDecimal16, VariantDecimal4, VariantDecimal8, 
VariantList,
+    ShortString, Variant, VariantDecimal4, VariantDecimal8, VariantDecimal16, 
VariantList,
     VariantMetadata, VariantObject,
 };
 use arrow_schema::ArrowError;
@@ -3403,10 +3403,12 @@ mod tests {
             // This should fail because "unknown_field" is not in the metadata
             let result = obj.try_insert("unknown_field", "value");
             assert!(result.is_err());
-            assert!(result
-                .unwrap_err()
-                .to_string()
-                .contains("Field name 'unknown_field' not found"));
+            assert!(
+                result
+                    .unwrap_err()
+                    .to_string()
+                    .contains("Field name 'unknown_field' not found")
+            );
         }
     }
 
diff --git a/parquet-variant/src/decoder.rs b/parquet-variant/src/decoder.rs
index 26b4e204fa..8cf3cec112 100644
--- a/parquet-variant/src/decoder.rs
+++ b/parquet-variant/src/decoder.rs
@@ -14,10 +14,10 @@
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
+use crate::ShortString;
 use crate::utils::{
     array_from_slice, overflow_error, slice_from_slice_at_offset, 
string_from_slice,
 };
-use crate::ShortString;
 
 use arrow_schema::ArrowError;
 use chrono::{DateTime, Duration, NaiveDate, NaiveDateTime, NaiveTime, Utc};
@@ -143,7 +143,7 @@ impl OffsetSizeBytes {
             _ => {
                 return Err(ArrowError::InvalidArgumentError(
                     "offset_size_minus_one must be 0–3".to_string(),
-                ))
+                ));
             }
         };
         Ok(result)
diff --git a/parquet-variant/src/variant.rs b/parquet-variant/src/variant.rs
index 849947675b..5663ab155c 100644
--- a/parquet-variant/src/variant.rs
+++ b/parquet-variant/src/variant.rs
@@ -15,9 +15,9 @@
 // specific language governing permissions and limitations
 // under the License.
 
-pub use self::decimal::{VariantDecimal16, VariantDecimal4, VariantDecimal8};
+pub use self::decimal::{VariantDecimal4, VariantDecimal8, VariantDecimal16};
 pub use self::list::VariantList;
-pub use self::metadata::{VariantMetadata, EMPTY_VARIANT_METADATA, 
EMPTY_VARIANT_METADATA_BYTES};
+pub use self::metadata::{EMPTY_VARIANT_METADATA, EMPTY_VARIANT_METADATA_BYTES, 
VariantMetadata};
 pub use self::object::VariantObject;
 
 // Publically export types used in the API
@@ -25,7 +25,7 @@ pub use half::f16;
 pub use uuid::Uuid;
 
 use crate::decoder::{
-    self, get_basic_type, get_primitive_type, VariantBasicType, 
VariantPrimitiveType,
+    self, VariantBasicType, VariantPrimitiveType, get_basic_type, 
get_primitive_type,
 };
 use crate::path::{VariantPath, VariantPathElement};
 use crate::utils::{first_byte_from_slice, fits_precision, slice_from_slice};
diff --git a/parquet-variant/src/variant/decimal.rs 
b/parquet-variant/src/variant/decimal.rs
index 4793d88569..b0b7d36ed1 100644
--- a/parquet-variant/src/variant/decimal.rs
+++ b/parquet-variant/src/variant/decimal.rs
@@ -285,20 +285,24 @@ mod tests {
             decimal4_too_large.is_err(),
             "Decimal4 precision overflow should fail"
         );
-        assert!(decimal4_too_large
-            .unwrap_err()
-            .to_string()
-            .contains("wider than max precision"));
+        assert!(
+            decimal4_too_large
+                .unwrap_err()
+                .to_string()
+                .contains("wider than max precision")
+        );
 
         let decimal4_too_small = VariantDecimal4::try_new(-1_000_000_000_i32, 
2);
         assert!(
             decimal4_too_small.is_err(),
             "Decimal4 precision underflow should fail"
         );
-        assert!(decimal4_too_small
-            .unwrap_err()
-            .to_string()
-            .contains("wider than max precision"));
+        assert!(
+            decimal4_too_small
+                .unwrap_err()
+                .to_string()
+                .contains("wider than max precision")
+        );
 
         // Test valid edge cases for Decimal4
         let decimal4_max_valid = VariantDecimal4::try_new(999_999_999_i32, 2);
@@ -319,20 +323,24 @@ mod tests {
             decimal8_too_large.is_err(),
             "Decimal8 precision overflow should fail"
         );
-        assert!(decimal8_too_large
-            .unwrap_err()
-            .to_string()
-            .contains("wider than max precision"));
+        assert!(
+            decimal8_too_large
+                .unwrap_err()
+                .to_string()
+                .contains("wider than max precision")
+        );
 
         let decimal8_too_small = 
VariantDecimal8::try_new(-1_000_000_000_000_000_000_i64, 2);
         assert!(
             decimal8_too_small.is_err(),
             "Decimal8 precision underflow should fail"
         );
-        assert!(decimal8_too_small
-            .unwrap_err()
-            .to_string()
-            .contains("wider than max precision"));
+        assert!(
+            decimal8_too_small
+                .unwrap_err()
+                .to_string()
+                .contains("wider than max precision")
+        );
 
         // Test valid edge cases for Decimal8
         let decimal8_max_valid = 
VariantDecimal8::try_new(999_999_999_999_999_999_i64, 2);
@@ -354,10 +362,12 @@ mod tests {
             decimal16_too_large.is_err(),
             "Decimal16 precision overflow should fail"
         );
-        assert!(decimal16_too_large
-            .unwrap_err()
-            .to_string()
-            .contains("wider than max precision"));
+        assert!(
+            decimal16_too_large
+                .unwrap_err()
+                .to_string()
+                .contains("wider than max precision")
+        );
 
         let decimal16_too_small =
             
VariantDecimal16::try_new(-100000000000000000000000000000000000000_i128, 2);
@@ -365,10 +375,12 @@ mod tests {
             decimal16_too_small.is_err(),
             "Decimal16 precision underflow should fail"
         );
-        assert!(decimal16_too_small
-            .unwrap_err()
-            .to_string()
-            .contains("wider than max precision"));
+        assert!(
+            decimal16_too_small
+                .unwrap_err()
+                .to_string()
+                .contains("wider than max precision")
+        );
 
         // Test valid edge cases for Decimal16
         let decimal16_max_valid =
@@ -394,10 +406,12 @@ mod tests {
             decimal4_invalid_scale.is_err(),
             "Decimal4 with scale > 9 should fail"
         );
-        assert!(decimal4_invalid_scale
-            .unwrap_err()
-            .to_string()
-            .contains("larger than max precision"));
+        assert!(
+            decimal4_invalid_scale
+                .unwrap_err()
+                .to_string()
+                .contains("larger than max precision")
+        );
 
         let decimal4_invalid_scale_large = VariantDecimal4::try_new(123_i32, 
20);
         assert!(
@@ -418,10 +432,12 @@ mod tests {
             decimal8_invalid_scale.is_err(),
             "Decimal8 with scale > 18 should fail"
         );
-        assert!(decimal8_invalid_scale
-            .unwrap_err()
-            .to_string()
-            .contains("larger than max precision"));
+        assert!(
+            decimal8_invalid_scale
+                .unwrap_err()
+                .to_string()
+                .contains("larger than max precision")
+        );
 
         let decimal8_invalid_scale_large = VariantDecimal8::try_new(123_i64, 
25);
         assert!(
@@ -442,10 +458,12 @@ mod tests {
             decimal16_invalid_scale.is_err(),
             "Decimal16 with scale > 38 should fail"
         );
-        assert!(decimal16_invalid_scale
-            .unwrap_err()
-            .to_string()
-            .contains("larger than max precision"));
+        assert!(
+            decimal16_invalid_scale
+                .unwrap_err()
+                .to_string()
+                .contains("larger than max precision")
+        );
 
         let decimal16_invalid_scale_large = 
VariantDecimal16::try_new(123_i128, 50);
         assert!(
diff --git a/parquet-variant/src/variant/list.rs 
b/parquet-variant/src/variant/list.rs
index d14d3a7796..a150295717 100644
--- a/parquet-variant/src/variant/list.rs
+++ b/parquet-variant/src/variant/list.rs
@@ -14,7 +14,7 @@
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
-use crate::decoder::{map_bytes_to_offsets, OffsetSizeBytes};
+use crate::decoder::{OffsetSizeBytes, map_bytes_to_offsets};
 use crate::utils::{
     first_byte_from_slice, overflow_error, slice_from_slice, 
slice_from_slice_at_offset,
 };
diff --git a/parquet-variant/src/variant/metadata.rs 
b/parquet-variant/src/variant/metadata.rs
index 604ee0e890..9f9688acd0 100644
--- a/parquet-variant/src/variant/metadata.rs
+++ b/parquet-variant/src/variant/metadata.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::decoder::{map_bytes_to_offsets, OffsetSizeBytes};
+use crate::decoder::{OffsetSizeBytes, map_bytes_to_offsets};
 use crate::utils::{
     first_byte_from_slice, overflow_error, slice_from_slice, string_from_slice,
     try_binary_search_range_by,
@@ -285,14 +285,13 @@ impl<'m> VariantMetadata<'m> {
                 let mut current_offset = offsets.next().unwrap_or(0);
                 let mut prev_value: Option<&str> = None;
                 for next_offset in offsets {
-                    let current_value =
-                        value_buffer
-                            .get(current_offset..next_offset)
-                            .ok_or_else(|| {
-                                ArrowError::InvalidArgumentError(format!(
+                    let current_value = 
value_buffer.get(current_offset..next_offset).ok_or_else(
+                        || {
+                            ArrowError::InvalidArgumentError(format!(
                                 "range {current_offset}..{next_offset} is 
invalid or out of bounds"
                             ))
-                            })?;
+                        },
+                    )?;
 
                     if let Some(prev_val) = prev_value {
                         if current_value <= prev_val {
diff --git a/parquet-variant/src/variant/object.rs 
b/parquet-variant/src/variant/object.rs
index 713be977b9..4e6faa7ab2 100644
--- a/parquet-variant/src/variant/object.rs
+++ b/parquet-variant/src/variant/object.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::decoder::{map_bytes_to_offsets, OffsetSizeBytes};
+use crate::decoder::{OffsetSizeBytes, map_bytes_to_offsets};
 use crate::utils::{
     first_byte_from_slice, overflow_error, slice_from_slice, 
try_binary_search_range_by,
 };
diff --git a/parquet-variant/tests/variant_interop.rs 
b/parquet-variant/tests/variant_interop.rs
index 00c326c064..d931d39293 100644
--- a/parquet-variant/tests/variant_interop.rs
+++ b/parquet-variant/tests/variant_interop.rs
@@ -23,7 +23,7 @@ use std::{env, fs};
 
 use chrono::{DateTime, NaiveDate, NaiveTime};
 use parquet_variant::{
-    ShortString, Variant, VariantBuilder, VariantDecimal16, VariantDecimal4, 
VariantDecimal8,
+    ShortString, Variant, VariantBuilder, VariantDecimal4, VariantDecimal8, 
VariantDecimal16,
 };
 
 use rand::rngs::StdRng;
@@ -109,14 +109,29 @@ fn get_primitive_cases() -> Vec<(&'static str, 
Variant<'static, 'static>)> {
     // Cases are commented out
     // Enabling is tracked in  https://github.com/apache/arrow-rs/issues/7630
     vec![
-        ("primitive_binary", Variant::Binary(&[0x03, 0x13, 0x37, 0xde, 0xad, 
0xbe, 0xef, 0xca, 0xfe])),
+        (
+            "primitive_binary",
+            Variant::Binary(&[0x03, 0x13, 0x37, 0xde, 0xad, 0xbe, 0xef, 0xca, 
0xfe]),
+        ),
         ("primitive_boolean_false", Variant::BooleanFalse),
         ("primitive_boolean_true", Variant::BooleanTrue),
-        ("primitive_date", Variant::Date(NaiveDate::from_ymd_opt(2025, 4 , 
16).unwrap())),
-        ("primitive_decimal4", Variant::from(VariantDecimal4::try_new(1234i32, 
2u8).unwrap())),
+        (
+            "primitive_date",
+            Variant::Date(NaiveDate::from_ymd_opt(2025, 4, 16).unwrap()),
+        ),
+        (
+            "primitive_decimal4",
+            Variant::from(VariantDecimal4::try_new(1234i32, 2u8).unwrap()),
+        ),
         // ("primitive_decimal8", Variant::Decimal8{integer: 1234567890, 
scale: 2}),
-        ("primitive_decimal8", 
Variant::Decimal8(VariantDecimal8::try_new(1234567890,2).unwrap())),
-        ("primitive_decimal16", 
Variant::Decimal16(VariantDecimal16::try_new(1234567891234567890, 2).unwrap())),
+        (
+            "primitive_decimal8",
+            Variant::Decimal8(VariantDecimal8::try_new(1234567890, 
2).unwrap()),
+        ),
+        (
+            "primitive_decimal16",
+            Variant::Decimal16(VariantDecimal16::try_new(1234567891234567890, 
2).unwrap()),
+        ),
         ("primitive_float", Variant::Float(1234567890.1234)),
         ("primitive_double", Variant::Double(1234567890.1234)),
         ("primitive_int8", Variant::Int8(42)),
@@ -124,14 +139,64 @@ fn get_primitive_cases() -> Vec<(&'static str, 
Variant<'static, 'static>)> {
         ("primitive_int32", Variant::Int32(123456)),
         ("primitive_int64", Variant::Int64(1234567890123456789)),
         ("primitive_null", Variant::Null),
-        ("primitive_string", Variant::String("This string is longer than 64 
bytes and therefore does not fit in a short_string and it also includes several 
non ascii characters such as 🐢, 💖, ♥\u{fe0f}, 🎣 and 🤦!!")),
-        ("primitive_timestamp", 
Variant::TimestampMicros(NaiveDate::from_ymd_opt(2025, 4, 
16).unwrap().and_hms_milli_opt(16, 34, 56, 780).unwrap().and_utc())),
-        ("primitive_timestampntz", 
Variant::TimestampNtzMicros(NaiveDate::from_ymd_opt(2025, 4, 
16).unwrap().and_hms_milli_opt(12, 34, 56, 780).unwrap())),
-        ("primitive_timestamp_nanos", 
Variant::TimestampNanos(NaiveDate::from_ymd_opt(2024, 11, 
7).unwrap().and_hms_nano_opt(12, 33, 54, 123456789).unwrap().and_utc())),
-        ("primitive_timestampntz_nanos", 
Variant::TimestampNtzNanos(NaiveDate::from_ymd_opt(2024, 11, 
7).unwrap().and_hms_nano_opt(12, 33, 54, 123456789).unwrap())),
-        ("primitive_uuid", 
Variant::Uuid(Uuid::parse_str("f24f9b64-81fa-49d1-b74e-8c09a6e31c56").unwrap())),
-        ("short_string", Variant::ShortString(ShortString::try_new("Less than 
64 bytes (❤\u{fe0f} with utf8)").unwrap())),
-        ("primitive_time", Variant::Time(NaiveTime::from_hms_micro_opt(12, 33, 
54, 123456).unwrap())),
+        (
+            "primitive_string",
+            Variant::String(
+                "This string is longer than 64 bytes and therefore does not 
fit in a short_string and it also includes several non ascii characters such as 
🐢, 💖, ♥\u{fe0f}, 🎣 and 🤦!!",
+            ),
+        ),
+        (
+            "primitive_timestamp",
+            Variant::TimestampMicros(
+                NaiveDate::from_ymd_opt(2025, 4, 16)
+                    .unwrap()
+                    .and_hms_milli_opt(16, 34, 56, 780)
+                    .unwrap()
+                    .and_utc(),
+            ),
+        ),
+        (
+            "primitive_timestampntz",
+            Variant::TimestampNtzMicros(
+                NaiveDate::from_ymd_opt(2025, 4, 16)
+                    .unwrap()
+                    .and_hms_milli_opt(12, 34, 56, 780)
+                    .unwrap(),
+            ),
+        ),
+        (
+            "primitive_timestamp_nanos",
+            Variant::TimestampNanos(
+                NaiveDate::from_ymd_opt(2024, 11, 7)
+                    .unwrap()
+                    .and_hms_nano_opt(12, 33, 54, 123456789)
+                    .unwrap()
+                    .and_utc(),
+            ),
+        ),
+        (
+            "primitive_timestampntz_nanos",
+            Variant::TimestampNtzNanos(
+                NaiveDate::from_ymd_opt(2024, 11, 7)
+                    .unwrap()
+                    .and_hms_nano_opt(12, 33, 54, 123456789)
+                    .unwrap(),
+            ),
+        ),
+        (
+            "primitive_uuid",
+            
Variant::Uuid(Uuid::parse_str("f24f9b64-81fa-49d1-b74e-8c09a6e31c56").unwrap()),
+        ),
+        (
+            "short_string",
+            Variant::ShortString(
+                ShortString::try_new("Less than 64 bytes (❤\u{fe0f} with 
utf8)").unwrap(),
+            ),
+        ),
+        (
+            "primitive_time",
+            Variant::Time(NaiveTime::from_hms_micro_opt(12, 33, 54, 
123456).unwrap()),
+        ),
     ]
 }
 #[test]

Reply via email to