This is an automated email from the ASF dual-hosted git repository. mbrobbel pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push: new 9c0cb9a56f chore: bump MSRV to 1.84 (#7926) 9c0cb9a56f is described below commit 9c0cb9a56f0099e7d39087826d7e409ce0f1bf5f Author: Matthijs Brobbel <m1brob...@gmail.com> AuthorDate: Tue Jul 15 10:24:15 2025 +0200 chore: bump MSRV to 1.84 (#7926) # Which issue does this PR close? None. # Rationale for this change - This allows us to keep up with dependencies bumping their MSRV (e.g. #7924) - parquet variant crates now use the workspace MSRV - #7395 is the next release and because this is a major release we can bump MSRV now for all the 56.x.y releases We can bump to 1.85 in #7835 to unblock #7270. # What changes are included in this PR? - Bump MSRV to 1.84 which was released more than 6 months ago - Removed half pins from CI # Are these changes tested? CI. # Are there any user-facing changes? Yes. --- .github/workflows/rust.yml | 14 +------------- Cargo.toml | 4 ++-- README.md | 6 +----- arrow-array/benches/union_array.rs | 11 +++-------- arrow-array/src/arithmetic.rs | 8 ++++---- arrow-array/src/array/list_array.rs | 2 +- arrow-array/src/array/list_view_array.rs | 2 +- arrow-avro/src/reader/record.rs | 2 +- arrow-buffer/src/buffer/immutable.rs | 6 +++--- arrow-buffer/src/builder/mod.rs | 4 ++-- arrow-cast/src/cast/list.rs | 2 +- arrow-cast/src/cast/mod.rs | 2 +- arrow-data/src/data.rs | 2 +- arrow-flight/src/encode.rs | 4 ++-- arrow-ord/src/sort.rs | 2 +- arrow-pyarrow-integration-testing/Cargo.toml | 2 +- arrow-pyarrow-testing/Cargo.toml | 4 ++-- arrow-select/src/coalesce.rs | 2 +- arrow-select/src/concat.rs | 6 +++--- arrow-select/src/filter.rs | 11 +++++------ arrow/benches/array_data_validate.rs | 2 +- arrow/benches/partition_kernels.rs | 11 +++++------ arrow/benches/string_run_iterator.rs | 2 +- arrow/src/util/bench_util.rs | 4 ++-- parquet-variant-compute/Cargo.toml | 3 +-- parquet-variant-json/Cargo.toml | 3 +-- parquet-variant/Cargo.toml | 4 +--- parquet/src/arrow/arrow_reader/statistics.rs | 28 ++++++++++++++++------------ parquet/src/arrow/arrow_writer/byte_array.rs | 4 ++-- parquet/src/arrow/arrow_writer/levels.rs | 12 ++++++------ parquet/src/arrow/arrow_writer/mod.rs | 8 ++++---- parquet/src/arrow/buffer/offset_buffer.rs | 2 +- parquet/src/column/writer/mod.rs | 2 +- parquet/src/encodings/rle.rs | 2 +- parquet/src/file/metadata/writer.rs | 4 ++-- parquet/src/file/serialized_reader.rs | 3 +-- parquet/src/util/bit_util.rs | 2 +- parquet/tests/arrow_reader/mod.rs | 4 ++-- parquet/tests/arrow_reader/statistics.rs | 2 +- 39 files changed, 87 insertions(+), 111 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index e4ffb10a11..38cccdec3c 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -30,7 +30,6 @@ on: pull_request: jobs: - # Check workspace wide compile and test with default features for # mac macos: @@ -54,7 +53,6 @@ jobs: # PyArrow tests happen in integration.yml. cargo test --workspace - # Check workspace wide compile and test with default features for # windows windows: @@ -84,8 +82,7 @@ jobs: # do not produce debug symbols to keep memory usage down export RUSTFLAGS="-C debuginfo=0" export PATH=$PATH:/d/protoc/bin - cargo test --workspace - + cargo test --workspace # Run cargo fmt for all crates lint: @@ -121,15 +118,6 @@ jobs: uses: ./.github/actions/setup-builder - name: Install cargo-msrv run: cargo install cargo-msrv - - name: Downgrade arrow-pyarrow-integration-testing dependencies - working-directory: arrow-pyarrow-integration-testing - # Necessary because half 2.5 requires rust 1.81 or newer - run: | - cargo update -p half --precise 2.4.0 - - name: Downgrade workspace dependencies - # Necessary because half 2.5 requires rust 1.81 or newer - run: | - cargo update -p half --precise 2.4.0 - name: Check all packages run: | # run `cargo msrv verify --manifest-path "path/to/Cargo.toml"` to see problematic dependencies diff --git a/Cargo.toml b/Cargo.toml index 30261cf607..73c0f7058b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -81,7 +81,7 @@ include = [ "NOTICE.txt", ] edition = "2021" -rust-version = "1.81" +rust-version = "1.84" [workspace.dependencies] arrow = { version = "55.2.0", path = "./arrow", default-features = false } @@ -102,7 +102,7 @@ arrow-string = { version = "55.2.0", path = "./arrow-string" } parquet = { version = "55.2.0", path = "./parquet", default-features = false } # These crates have not yet been released and thus do not use the workspace version -parquet-variant = { version = "0.1.0", path = "./parquet-variant"} +parquet-variant = { version = "0.1.0", path = "./parquet-variant" } parquet-variant-json = { version = "0.1.0", path = "./parquet-variant-json" } parquet-variant-compute = { version = "0.1.0", path = "./parquet-variant-json" } diff --git a/README.md b/README.md index cdaaf7fb80..7e7b3b6cf0 100644 --- a/README.md +++ b/README.md @@ -79,14 +79,10 @@ Planned Release Schedule ### Rust Version Compatibility Policy -arrow-rs, parquet and object_store are built and tested with stable Rust, and will keep a rolling MSRV (minimum supported Rust version) that can only be updated in major releases on a need by basis (e.g. project dependencies bump their MSRV or a particular Rust feature is useful for us etc.). The new MSRV if selected will be at least 6 months old. The minor releases are guaranteed to have the same MSRV. +arrow-rs and parquet are built and tested with stable Rust, and will keep a rolling MSRV (minimum supported Rust version) that can only be updated in major releases on a need by basis (e.g. project dependencies bump their MSRV or a particular Rust feature is useful for us etc.). The new MSRV if selected will be at least 6 months old. The minor releases are guaranteed to have the same MSRV. Note: If a Rust hotfix is released for the current MSRV, the MSRV will be updated to the specific minor version that includes all applicable hotfixes preceding other policies. -E.g. - -in Apr 2025 we will release version 55.0.0 which might have a version bump. But the Rust version selected in this case will be at most version 1.81. - ### Guidelines for `panic` vs `Result` In general, use panics for bad states that are unreachable, unrecoverable or harmful. diff --git a/arrow-array/benches/union_array.rs b/arrow-array/benches/union_array.rs index f3894e249f..d63eb9e434 100644 --- a/arrow-array/benches/union_array.rs +++ b/arrow-array/benches/union_array.rs @@ -15,11 +15,7 @@ // specific language governing permissions and limitations // under the License. -use std::{ - hint, - iter::{repeat, repeat_with}, - sync::Arc, -}; +use std::{hint, iter::repeat_with, sync::Arc}; use arrow_array::{Array, ArrayRef, Int32Array, UnionArray}; use arrow_buffer::{NullBuffer, ScalarBuffer}; @@ -67,9 +63,8 @@ fn criterion_benchmark(c: &mut Criterion) { fields, type_ids.cycle().take(4096).collect(), None, - repeat(array_with_nulls()) - .take(with_nulls as usize) - .chain(repeat(array_without_nulls()).take(without_nulls as usize)) + std::iter::repeat_n(array_with_nulls(), with_nulls as usize) + .chain(std::iter::repeat_n(array_without_nulls(), without_nulls as usize)) .collect(), ) .unwrap(); diff --git a/arrow-array/src/arithmetic.rs b/arrow-array/src/arithmetic.rs index 38717807b7..031864cb08 100644 --- a/arrow-array/src/arithmetic.rs +++ b/arrow-array/src/arithmetic.rs @@ -420,13 +420,13 @@ native_type_float_op!( 1., unsafe { // Need to allow in clippy because - // current MSRV (Minimum Supported Rust Version) is `1.81.0` but this item is stable since `1.87.0` + // current MSRV (Minimum Supported Rust Version) is `1.84.0` but this item is stable since `1.87.0` #[allow(unnecessary_transmutes)] std::mem::transmute(-1_i32) }, unsafe { // Need to allow in clippy because - // current MSRV (Minimum Supported Rust Version) is `1.81.0` but this item is stable since `1.87.0` + // current MSRV (Minimum Supported Rust Version) is `1.84.0` but this item is stable since `1.87.0` #[allow(unnecessary_transmutes)] std::mem::transmute(i32::MAX) } @@ -437,13 +437,13 @@ native_type_float_op!( 1., unsafe { // Need to allow in clippy because - // current MSRV (Minimum Supported Rust Version) is `1.81.0` but this item is stable since `1.87.0` + // current MSRV (Minimum Supported Rust Version) is `1.84.0` but this item is stable since `1.87.0` #[allow(unnecessary_transmutes)] std::mem::transmute(-1_i64) }, unsafe { // Need to allow in clippy because - // current MSRV (Minimum Supported Rust Version) is `1.81.0` but this item is stable since `1.87.0` + // current MSRV (Minimum Supported Rust Version) is `1.84.0` but this item is stable since `1.87.0` #[allow(unnecessary_transmutes)] std::mem::transmute(i64::MAX) } diff --git a/arrow-array/src/array/list_array.rs b/arrow-array/src/array/list_array.rs index 7962777656..832a1c0a9a 100644 --- a/arrow-array/src/array/list_array.rs +++ b/arrow-array/src/array/list_array.rs @@ -454,7 +454,7 @@ impl<OffsetSize: OffsetSizeTrait> From<FixedSizeListArray> for GenericListArray< _ => unreachable!(), }; - let offsets = OffsetBuffer::from_lengths(std::iter::repeat(size).take(value.len())); + let offsets = OffsetBuffer::from_lengths(std::iter::repeat_n(size, value.len())); Self { data_type: Self::DATA_TYPE_CONSTRUCTOR(field.clone()), diff --git a/arrow-array/src/array/list_view_array.rs b/arrow-array/src/array/list_view_array.rs index 6118607bcb..a239ea1e5e 100644 --- a/arrow-array/src/array/list_view_array.rs +++ b/arrow-array/src/array/list_view_array.rs @@ -475,7 +475,7 @@ impl<OffsetSize: OffsetSizeTrait> From<FixedSizeListArray> for GenericListViewAr _ => unreachable!(), }; let mut acc = 0_usize; - let iter = std::iter::repeat(size).take(value.len()); + let iter = std::iter::repeat_n(size, value.len()); let mut sizes = Vec::with_capacity(iter.size_hint().0); let mut offsets = Vec::with_capacity(iter.size_hint().0); diff --git a/arrow-avro/src/reader/record.rs b/arrow-avro/src/reader/record.rs index 0a4d47ad24..2ef382a226 100644 --- a/arrow-avro/src/reader/record.rs +++ b/arrow-avro/src/reader/record.rs @@ -338,7 +338,7 @@ impl Decoder { moff.push_length(0); } Self::Fixed(sz, accum) => { - accum.extend(std::iter::repeat(0u8).take(*sz as usize)); + accum.extend(std::iter::repeat_n(0u8, *sz as usize)); } Self::Decimal128(_, _, _, builder) => builder.append_value(0), Self::Decimal256(_, _, _, builder) => builder.append_value(i256::ZERO), diff --git a/arrow-buffer/src/buffer/immutable.rs b/arrow-buffer/src/buffer/immutable.rs index aedfe97468..2b55bf6604 100644 --- a/arrow-buffer/src/buffer/immutable.rs +++ b/arrow-buffer/src/buffer/immutable.rs @@ -997,13 +997,13 @@ mod tests { #[should_panic(expected = "capacity overflow")] fn test_from_iter_overflow() { let iter_len = usize::MAX / std::mem::size_of::<u64>() + 1; - let _ = Buffer::from_iter(std::iter::repeat(0_u64).take(iter_len)); + let _ = Buffer::from_iter(std::iter::repeat_n(0_u64, iter_len)); } #[test] fn bit_slice_length_preserved() { // Create a boring buffer - let buf = Buffer::from_iter(std::iter::repeat(true).take(64)); + let buf = Buffer::from_iter(std::iter::repeat_n(true, 64)); let assert_preserved = |offset: usize, len: usize| { let new_buf = buf.bit_slice(offset, len); @@ -1035,7 +1035,7 @@ mod tests { #[test] fn test_strong_count() { - let buffer = Buffer::from_iter(std::iter::repeat(0_u8).take(100)); + let buffer = Buffer::from_iter(std::iter::repeat_n(0_u8, 100)); assert_eq!(buffer.strong_count(), 1); let buffer2 = buffer.clone(); diff --git a/arrow-buffer/src/builder/mod.rs b/arrow-buffer/src/builder/mod.rs index f7e0e29dac..abe510bdab 100644 --- a/arrow-buffer/src/builder/mod.rs +++ b/arrow-buffer/src/builder/mod.rs @@ -26,7 +26,7 @@ pub use null::*; pub use offset::*; use crate::{ArrowNativeType, Buffer, MutableBuffer}; -use std::{iter, marker::PhantomData}; +use std::marker::PhantomData; /// Builder for creating a [Buffer] object. /// @@ -214,7 +214,7 @@ impl<T: ArrowNativeType> BufferBuilder<T> { #[inline] pub fn append_n(&mut self, n: usize, v: T) { self.reserve(n); - self.extend(iter::repeat(v).take(n)) + self.extend(std::iter::repeat_n(v, n)) } /// Appends `n`, zero-initialized values diff --git a/arrow-cast/src/cast/list.rs b/arrow-cast/src/cast/list.rs index ddcbca361b..1728cc4061 100644 --- a/arrow-cast/src/cast/list.rs +++ b/arrow-cast/src/cast/list.rs @@ -24,7 +24,7 @@ pub(crate) fn cast_values_to_list<O: OffsetSizeTrait>( cast_options: &CastOptions, ) -> Result<ArrayRef, ArrowError> { let values = cast_with_options(array, to.data_type(), cast_options)?; - let offsets = OffsetBuffer::from_lengths(std::iter::repeat(1).take(values.len())); + let offsets = OffsetBuffer::from_lengths(std::iter::repeat_n(1, values.len())); let list = GenericListArray::<O>::new(to.clone(), offsets, values, None); Ok(Arc::new(list)) } diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs index 884a32197c..d8cc514100 100644 --- a/arrow-cast/src/cast/mod.rs +++ b/arrow-cast/src/cast/mod.rs @@ -2167,7 +2167,7 @@ fn cast_numeric_to_binary<FROM: ArrowPrimitiveType, O: OffsetSizeTrait>( ) -> Result<ArrayRef, ArrowError> { let array = array.as_primitive::<FROM>(); let size = std::mem::size_of::<FROM::Native>(); - let offsets = OffsetBuffer::from_lengths(std::iter::repeat(size).take(array.len())); + let offsets = OffsetBuffer::from_lengths(std::iter::repeat_n(size, array.len())); Ok(Arc::new(GenericBinaryArray::<O>::new( offsets, array.values().inner().clone(), diff --git a/arrow-data/src/data.rs b/arrow-data/src/data.rs index 473645d758..fca19bc3aa 100644 --- a/arrow-data/src/data.rs +++ b/arrow-data/src/data.rs @@ -638,7 +638,7 @@ impl ArrayData { ), DataType::Union(f, mode) => { let (id, _) = f.iter().next().unwrap(); - let ids = Buffer::from_iter(std::iter::repeat(id).take(len)); + let ids = Buffer::from_iter(std::iter::repeat_n(id, len)); let buffers = match mode { UnionMode::Sparse => vec![ids], UnionMode::Dense => { diff --git a/arrow-flight/src/encode.rs b/arrow-flight/src/encode.rs index 57ac9f3173..0a7a6df904 100644 --- a/arrow-flight/src/encode.rs +++ b/arrow-flight/src/encode.rs @@ -1695,9 +1695,9 @@ mod tests { #[tokio::test] async fn flight_data_size_even() { - let s1 = StringArray::from_iter_values(std::iter::repeat(".10 bytes.").take(1024)); + let s1 = StringArray::from_iter_values(std::iter::repeat_n(".10 bytes.", 1024)); let i1 = Int16Array::from_iter_values(0..1024); - let s2 = StringArray::from_iter_values(std::iter::repeat("6bytes").take(1024)); + let s2 = StringArray::from_iter_values(std::iter::repeat_n("6bytes", 1024)); let i2 = Int64Array::from_iter_values(0..1024); let batch = RecordBatch::try_from_iter(vec![ diff --git a/arrow-ord/src/sort.rs b/arrow-ord/src/sort.rs index b1b11ee0df..3a2d372e04 100644 --- a/arrow-ord/src/sort.rs +++ b/arrow-ord/src/sort.rs @@ -1791,7 +1791,7 @@ mod tests { None => { builder .values() - .extend(std::iter::repeat(None).take(fixed_length as usize)); + .extend(std::iter::repeat_n(None, fixed_length as usize)); builder.append(false); } } diff --git a/arrow-pyarrow-integration-testing/Cargo.toml b/arrow-pyarrow-integration-testing/Cargo.toml index d7c7acd046..c757f67393 100644 --- a/arrow-pyarrow-integration-testing/Cargo.toml +++ b/arrow-pyarrow-integration-testing/Cargo.toml @@ -25,7 +25,7 @@ authors = ["Apache Arrow <d...@arrow.apache.org>"] license = "Apache-2.0" keywords = ["arrow"] edition = "2021" -rust-version = "1.81" +rust-version = "1.84" publish = false [lib] diff --git a/arrow-pyarrow-testing/Cargo.toml b/arrow-pyarrow-testing/Cargo.toml index 96c20d31bb..8bbf364f2e 100644 --- a/arrow-pyarrow-testing/Cargo.toml +++ b/arrow-pyarrow-testing/Cargo.toml @@ -38,9 +38,9 @@ homepage = "https://github.com/apache/arrow-rs" repository = "https://github.com/apache/arrow-rs" authors = ["Apache Arrow <d...@arrow.apache.org>"] license = "Apache-2.0" -keywords = [ "arrow" ] +keywords = ["arrow"] edition = "2021" -rust-version = "1.81" +rust-version = "1.84" publish = false diff --git a/arrow-select/src/coalesce.rs b/arrow-select/src/coalesce.rs index fc7af1a332..2360f25354 100644 --- a/arrow-select/src/coalesce.rs +++ b/arrow-select/src/coalesce.rs @@ -730,7 +730,7 @@ mod tests { // The strings are designed to exactly fit into buffers that are powers of 2 long let batch = stringview_batch_repeated(100, [Some("This string is a power of two=32")]); let output_batches = Test::new() - .with_batches(std::iter::repeat(batch).take(20)) + .with_batches(std::iter::repeat_n(batch, 20)) .with_batch_size(900) .with_expected_output_sizes(vec![900, 900, 200]) .run(); diff --git a/arrow-select/src/concat.rs b/arrow-select/src/concat.rs index 0a64d0db35..6636988305 100644 --- a/arrow-select/src/concat.rs +++ b/arrow-select/src/concat.rs @@ -1335,7 +1335,7 @@ mod tests { assert_eq!(data.buffers()[0].len(), 120); assert_eq!(data.buffers()[0].capacity(), 128); // Nearest multiple of 64 - let a = StringArray::from_iter_values(std::iter::repeat("foo").take(100)); + let a = StringArray::from_iter_values(std::iter::repeat_n("foo", 100)); let b = StringArray::from(vec!["bingo", "bongo", "lorem", ""]); let a = concat(&[&a, &b]).unwrap(); @@ -1358,8 +1358,8 @@ mod tests { assert_eq!(data.buffers()[1].len(), 135); assert_eq!(data.buffers()[1].capacity(), 192); // Nearest multiple of 64 - let a = LargeBinaryArray::from_iter_values(std::iter::repeat(b"foo").take(100)); - let b = LargeBinaryArray::from_iter_values(std::iter::repeat(b"cupcakes").take(10)); + let a = LargeBinaryArray::from_iter_values(std::iter::repeat_n(b"foo", 100)); + let b = LargeBinaryArray::from_iter_values(std::iter::repeat_n(b"cupcakes", 10)); let a = concat(&[&a, &b]).unwrap(); let data = a.to_data(); diff --git a/arrow-select/src/filter.rs b/arrow-select/src/filter.rs index ed003a58dc..641599cea6 100644 --- a/arrow-select/src/filter.rs +++ b/arrow-select/src/filter.rs @@ -1449,12 +1449,11 @@ mod tests { #[test] fn test_slices() { // takes up 2 u64s - let bools = std::iter::repeat(true) - .take(10) - .chain(std::iter::repeat(false).take(30)) - .chain(std::iter::repeat(true).take(20)) - .chain(std::iter::repeat(false).take(17)) - .chain(std::iter::repeat(true).take(4)); + let bools = std::iter::repeat_n(true, 10) + .chain(std::iter::repeat_n(false, 30)) + .chain(std::iter::repeat_n(true, 20)) + .chain(std::iter::repeat_n(false, 17)) + .chain(std::iter::repeat_n(true, 4)); let bool_array: BooleanArray = bools.map(Some).collect(); diff --git a/arrow/benches/array_data_validate.rs b/arrow/benches/array_data_validate.rs index 531462f2d8..33d000d14b 100644 --- a/arrow/benches/array_data_validate.rs +++ b/arrow/benches/array_data_validate.rs @@ -53,7 +53,7 @@ fn validate_benchmark(c: &mut Criterion) { b.iter(|| validate_utf8_array(&str_arr)) }); - let byte_array = BinaryArray::from_iter_values(std::iter::repeat(b"test").take(20000)); + let byte_array = BinaryArray::from_iter_values(std::iter::repeat_n(b"test", 20000)); c.bench_function("byte_array_to_string_array 20000", |b| { b.iter(|| StringArray::from(BinaryArray::from(byte_array.to_data()))) }); diff --git a/arrow/benches/partition_kernels.rs b/arrow/benches/partition_kernels.rs index 82de6e0f00..8e3907d261 100644 --- a/arrow/benches/partition_kernels.rs +++ b/arrow/benches/partition_kernels.rs @@ -28,7 +28,7 @@ use arrow::{ }; use arrow_ord::partition::partition; use rand::distr::{Distribution, StandardUniform}; -use std::{hint, iter}; +use std::hint; fn create_array<T: ArrowPrimitiveType>(size: usize, with_nulls: bool) -> ArrayRef where @@ -45,11 +45,10 @@ fn bench_partition(sorted_columns: &[ArrayRef]) { fn create_sorted_low_cardinality_data(length: usize) -> Vec<ArrayRef> { let arr = Int64Array::from_iter_values( - iter::repeat(1) - .take(length / 4) - .chain(iter::repeat(2).take(length / 4)) - .chain(iter::repeat(3).take(length / 4)) - .chain(iter::repeat(4).take(length / 4)), + std::iter::repeat_n(1, length / 4) + .chain(std::iter::repeat_n(2, length / 4)) + .chain(std::iter::repeat_n(3, length / 4)) + .chain(std::iter::repeat_n(4, length / 4)), ); lexsort( &[SortColumn { diff --git a/arrow/benches/string_run_iterator.rs b/arrow/benches/string_run_iterator.rs index 32088573dc..9766f10b4d 100644 --- a/arrow/benches/string_run_iterator.rs +++ b/arrow/benches/string_run_iterator.rs @@ -29,7 +29,7 @@ fn build_strings_runs( let run_len = logical_array_len / physical_array_len; let mut values: Vec<String> = (0..physical_array_len) .map(|_| (0..string_len).map(|_| rng.random::<char>()).collect()) - .flat_map(|s| std::iter::repeat(s).take(run_len)) + .flat_map(|s| std::iter::repeat_n(s, run_len)) .collect(); while values.len() < logical_array_len { let last_val = values[values.len() - 1].clone(); diff --git a/arrow/src/util/bench_util.rs b/arrow/src/util/bench_util.rs index 521dc74877..1b7819001c 100644 --- a/arrow/src/util/bench_util.rs +++ b/arrow/src/util/bench_util.rs @@ -396,7 +396,7 @@ pub fn create_primitive_run_array<R: RunEndIndexType, V: ArrowPrimitiveType>( take_len += 1; run_len_extra -= 1; } - std::iter::repeat(V::Native::from_usize(s).unwrap()).take(take_len) + std::iter::repeat_n(V::Native::from_usize(s).unwrap(), take_len) }) .collect(); while values.len() < logical_array_len { @@ -434,7 +434,7 @@ pub fn create_string_array_for_runs( take_len += 1; run_len_extra -= 1; } - std::iter::repeat(s).take(take_len) + std::iter::repeat_n(s, take_len) }) .collect(); while values.len() < logical_array_len { diff --git a/parquet-variant-compute/Cargo.toml b/parquet-variant-compute/Cargo.toml index a053803c55..c596a39045 100644 --- a/parquet-variant-compute/Cargo.toml +++ b/parquet-variant-compute/Cargo.toml @@ -27,8 +27,7 @@ repository = { workspace = true } authors = { workspace = true } keywords = ["arrow", "parquet", "variant"] edition = { workspace = true } -# parquet-variant needs newer version than workspace -rust-version = "1.83" +rust-version = { workspace = true } [dependencies] diff --git a/parquet-variant-json/Cargo.toml b/parquet-variant-json/Cargo.toml index fed480afb4..76255f0681 100644 --- a/parquet-variant-json/Cargo.toml +++ b/parquet-variant-json/Cargo.toml @@ -28,8 +28,7 @@ authors = { workspace = true } keywords = ["arrow", "parquet", "variant"] readme = "README.md" edition = { workspace = true } -# parquet-variant needs newer version than workspace -rust-version = "1.83" +rust-version = { workspace = true } [dependencies] diff --git a/parquet-variant/Cargo.toml b/parquet-variant/Cargo.toml index 12fe609757..51fa4cc233 100644 --- a/parquet-variant/Cargo.toml +++ b/parquet-variant/Cargo.toml @@ -28,9 +28,7 @@ authors = { workspace = true } keywords = ["arrow", "parquet", "variant"] readme = "README.md" edition = { workspace = true } -# needs a newer version than workspace due to -# Error: `Option::<T>::unwrap` is not yet stable as a const fn -rust-version = "1.83" +rust-version = { workspace = true } [dependencies] arrow-schema = { workspace = true } diff --git a/parquet/src/arrow/arrow_reader/statistics.rs b/parquet/src/arrow/arrow_reader/statistics.rs index b976955129..eba1f56120 100644 --- a/parquet/src/arrow/arrow_reader/statistics.rs +++ b/parquet/src/arrow/arrow_reader/statistics.rs @@ -1497,9 +1497,10 @@ impl<'a> StatisticsConverter<'a> { { let Some(parquet_index) = self.parquet_column_index else { let num_row_groups = metadatas.into_iter().count(); - return Ok(BooleanArray::from_iter( - std::iter::repeat(None).take(num_row_groups), - )); + return Ok(BooleanArray::from_iter(std::iter::repeat_n( + None, + num_row_groups, + ))); }; let is_max_value_exact = metadatas @@ -1518,9 +1519,10 @@ impl<'a> StatisticsConverter<'a> { { let Some(parquet_index) = self.parquet_column_index else { let num_row_groups = metadatas.into_iter().count(); - return Ok(BooleanArray::from_iter( - std::iter::repeat(None).take(num_row_groups), - )); + return Ok(BooleanArray::from_iter(std::iter::repeat_n( + None, + num_row_groups, + ))); }; let is_min_value_exact = metadatas @@ -1539,9 +1541,10 @@ impl<'a> StatisticsConverter<'a> { { let Some(parquet_index) = self.parquet_column_index else { let num_row_groups = metadatas.into_iter().count(); - return Ok(UInt64Array::from_iter( - std::iter::repeat(None).take(num_row_groups), - )); + return Ok(UInt64Array::from_iter(std::iter::repeat_n( + None, + num_row_groups, + ))); }; let null_counts = metadatas @@ -1683,9 +1686,10 @@ impl<'a> StatisticsConverter<'a> { { let Some(parquet_index) = self.parquet_column_index else { let num_row_groups = row_group_indices.into_iter().count(); - return Ok(UInt64Array::from_iter( - std::iter::repeat(None).take(num_row_groups), - )); + return Ok(UInt64Array::from_iter(std::iter::repeat_n( + None, + num_row_groups, + ))); }; let iter = row_group_indices.into_iter().map(|rg_index| { diff --git a/parquet/src/arrow/arrow_writer/byte_array.rs b/parquet/src/arrow/arrow_writer/byte_array.rs index 9767ec98e6..2deb3c535a 100644 --- a/parquet/src/arrow/arrow_writer/byte_array.rs +++ b/parquet/src/arrow/arrow_writer/byte_array.rs @@ -548,11 +548,11 @@ where { if encoder.statistics_enabled != EnabledStatistics::None { if let Some((min, max)) = compute_min_max(values, indices.iter().cloned()) { - if encoder.min_value.as_ref().map_or(true, |m| m > &min) { + if encoder.min_value.as_ref().is_none_or(|m| m > &min) { encoder.min_value = Some(min); } - if encoder.max_value.as_ref().map_or(true, |m| m < &max) { + if encoder.max_value.as_ref().is_none_or(|m| m < &max) { encoder.max_value = Some(max); } } diff --git a/parquet/src/arrow/arrow_writer/levels.rs b/parquet/src/arrow/arrow_writer/levels.rs index 2b81693161..8f53cf2cba 100644 --- a/parquet/src/arrow/arrow_writer/levels.rs +++ b/parquet/src/arrow/arrow_writer/levels.rs @@ -353,10 +353,10 @@ impl LevelInfoBuilder { let len = range.end - range.start; let def_levels = info.def_levels.as_mut().unwrap(); - def_levels.extend(std::iter::repeat(ctx.def_level - 1).take(len)); + def_levels.extend(std::iter::repeat_n(ctx.def_level - 1, len)); if let Some(rep_levels) = info.rep_levels.as_mut() { - rep_levels.extend(std::iter::repeat(ctx.rep_level).take(len)); + rep_levels.extend(std::iter::repeat_n(ctx.rep_level, len)); } }) } @@ -444,9 +444,9 @@ impl LevelInfoBuilder { let len = end_idx - start_idx; child.visit_leaves(|leaf| { let rep_levels = leaf.rep_levels.as_mut().unwrap(); - rep_levels.extend(std::iter::repeat(ctx.rep_level - 1).take(len)); + rep_levels.extend(std::iter::repeat_n(ctx.rep_level - 1, len)); let def_levels = leaf.def_levels.as_mut().unwrap(); - def_levels.extend(std::iter::repeat(ctx.def_level - 1).take(len)); + def_levels.extend(std::iter::repeat_n(ctx.def_level - 1, len)); }) }; @@ -513,7 +513,7 @@ impl LevelInfoBuilder { ); } None => { - let iter = std::iter::repeat(info.max_def_level).take(len); + let iter = std::iter::repeat_n(info.max_def_level, len); def_levels.extend(iter); info.non_null_indices.extend(range); } @@ -523,7 +523,7 @@ impl LevelInfoBuilder { } if let Some(rep_levels) = &mut info.rep_levels { - rep_levels.extend(std::iter::repeat(info.max_rep_level).take(len)) + rep_levels.extend(std::iter::repeat_n(info.max_rep_level, len)) } } diff --git a/parquet/src/arrow/arrow_writer/mod.rs b/parquet/src/arrow/arrow_writer/mod.rs index 4782efda9c..e675be3190 100644 --- a/parquet/src/arrow/arrow_writer/mod.rs +++ b/parquet/src/arrow/arrow_writer/mod.rs @@ -2590,7 +2590,7 @@ mod tests { #[test] fn binary_single_column() { let one_vec: Vec<u8> = (0..SMALL_SIZE as u8).collect(); - let many_vecs: Vec<_> = std::iter::repeat(one_vec).take(SMALL_SIZE).collect(); + let many_vecs: Vec<_> = std::iter::repeat_n(one_vec, SMALL_SIZE).collect(); let many_vecs_iter = many_vecs.iter().map(|v| v.as_slice()); // BinaryArrays can't be built from Vec<Option<&str>>, so only call `values_required` @@ -2600,7 +2600,7 @@ mod tests { #[test] fn binary_view_single_column() { let one_vec: Vec<u8> = (0..SMALL_SIZE as u8).collect(); - let many_vecs: Vec<_> = std::iter::repeat(one_vec).take(SMALL_SIZE).collect(); + let many_vecs: Vec<_> = std::iter::repeat_n(one_vec, SMALL_SIZE).collect(); let many_vecs_iter = many_vecs.iter().map(|v| v.as_slice()); // BinaryArrays can't be built from Vec<Option<&str>>, so only call `values_required` @@ -2641,7 +2641,7 @@ mod tests { #[test] fn binary_column_bloom_filter() { let one_vec: Vec<u8> = (0..SMALL_SIZE as u8).collect(); - let many_vecs: Vec<_> = std::iter::repeat(one_vec).take(SMALL_SIZE).collect(); + let many_vecs: Vec<_> = std::iter::repeat_n(one_vec, SMALL_SIZE).collect(); let many_vecs_iter = many_vecs.iter().map(|v| v.as_slice()); let array = Arc::new(BinaryArray::from_iter_values(many_vecs_iter)); @@ -2680,7 +2680,7 @@ mod tests { #[test] fn large_binary_single_column() { let one_vec: Vec<u8> = (0..SMALL_SIZE as u8).collect(); - let many_vecs: Vec<_> = std::iter::repeat(one_vec).take(SMALL_SIZE).collect(); + let many_vecs: Vec<_> = std::iter::repeat_n(one_vec, SMALL_SIZE).collect(); let many_vecs_iter = many_vecs.iter().map(|v| v.as_slice()); // LargeBinaryArrays can't be built from Vec<Option<&str>>, so only call `values_required` diff --git a/parquet/src/arrow/buffer/offset_buffer.rs b/parquet/src/arrow/buffer/offset_buffer.rs index 5051dce12b..cfa17db63d 100644 --- a/parquet/src/arrow/buffer/offset_buffer.rs +++ b/parquet/src/arrow/buffer/offset_buffer.rs @@ -321,7 +321,7 @@ mod tests { #[test] fn test_pad_nulls_empty() { let mut buffer = OffsetBuffer::<i32>::default(); - let valid_mask = Buffer::from_iter(std::iter::repeat(false).take(9)); + let valid_mask = Buffer::from_iter(std::iter::repeat_n(false, 9)); buffer.pad_nulls(0, 0, 9, valid_mask.as_slice()); let array = buffer.into_array(Some(valid_mask), ArrowType::Utf8); diff --git a/parquet/src/column/writer/mod.rs b/parquet/src/column/writer/mod.rs index 0830797747..db7cd31468 100644 --- a/parquet/src/column/writer/mod.rs +++ b/parquet/src/column/writer/mod.rs @@ -1388,7 +1388,7 @@ fn update_stat<T: ParquetValueType, F>( return; } - if cur.as_ref().map_or(true, should_update) { + if cur.as_ref().is_none_or(should_update) { *cur = Some(val.clone()); } } diff --git a/parquet/src/encodings/rle.rs b/parquet/src/encodings/rle.rs index 89a1f00a58..03700917ab 100644 --- a/parquet/src/encodings/rle.rs +++ b/parquet/src/encodings/rle.rs @@ -865,7 +865,7 @@ mod tests { let mut data: Vec<u8> = vec![ (3 << 1) | 1, // bit-packed run of 3 * 8 ]; - data.extend(std::iter::repeat(0xFF).take(20)); + data.extend(std::iter::repeat_n(0xFF, 20)); let data: Bytes = data.into(); let mut decoder = RleDecoder::new(8); diff --git a/parquet/src/file/metadata/writer.rs b/parquet/src/file/metadata/writer.rs index 0320d1e474..5bb59b6b2f 100644 --- a/parquet/src/file/metadata/writer.rs +++ b/parquet/src/file/metadata/writer.rs @@ -393,7 +393,7 @@ impl<'a, W: Write> ParquetMetaDataWriter<'a, W> { self.metadata .row_groups() .iter() - .map(|rg| std::iter::repeat(None).take(rg.columns().len()).collect()) + .map(|rg| std::iter::repeat_n(None, rg.columns().len()).collect()) .collect() } } @@ -414,7 +414,7 @@ impl<'a, W: Write> ParquetMetaDataWriter<'a, W> { self.metadata .row_groups() .iter() - .map(|rg| std::iter::repeat(None).take(rg.columns().len()).collect()) + .map(|rg| std::iter::repeat_n(None, rg.columns().len()).collect()) .collect() } } diff --git a/parquet/src/file/serialized_reader.rs b/parquet/src/file/serialized_reader.rs index d16d2da9e0..2edb38deb3 100644 --- a/parquet/src/file/serialized_reader.rs +++ b/parquet/src/file/serialized_reader.rs @@ -41,7 +41,6 @@ use crate::thrift::TCompactSliceInputProtocol; use crate::thrift::TSerializable; use bytes::Bytes; use std::collections::VecDeque; -use std::iter; use std::{fs::File, io::Read, path::Path, sync::Arc}; use thrift::protocol::TCompactInputProtocol; @@ -293,7 +292,7 @@ impl<'a, R: ChunkReader> SerializedRowGroupReader<'a, R> { .map(|col| Sbbf::read_from_column_chunk(col, &*chunk_reader)) .collect::<Result<Vec<_>>>()? } else { - iter::repeat(None).take(metadata.columns().len()).collect() + std::iter::repeat_n(None, metadata.columns().len()).collect() }; Ok(Self { chunk_reader, diff --git a/parquet/src/util/bit_util.rs b/parquet/src/util/bit_util.rs index b3015c2ba7..f31f70b426 100644 --- a/parquet/src/util/bit_util.rs +++ b/parquet/src/util/bit_util.rs @@ -245,7 +245,7 @@ impl BitWriter { pub fn skip(&mut self, num_bytes: usize) -> usize { self.flush(); let result = self.buffer.len(); - self.buffer.extend(std::iter::repeat(0).take(num_bytes)); + self.buffer.extend(std::iter::repeat_n(0, num_bytes)); result } diff --git a/parquet/tests/arrow_reader/mod.rs b/parquet/tests/arrow_reader/mod.rs index 21aa1c3f26..739aa56662 100644 --- a/parquet/tests/arrow_reader/mod.rs +++ b/parquet/tests/arrow_reader/mod.rs @@ -505,7 +505,7 @@ fn make_bytearray_batch( large_binary_values: Vec<&[u8]>, ) -> RecordBatch { let num_rows = string_values.len(); - let name: StringArray = std::iter::repeat(Some(name)).take(num_rows).collect(); + let name: StringArray = std::iter::repeat_n(Some(name), num_rows).collect(); let service_string: StringArray = string_values.iter().map(Some).collect(); let service_binary: BinaryArray = binary_values.iter().map(Some).collect(); let service_fixedsize: FixedSizeBinaryArray = fixedsize_values @@ -552,7 +552,7 @@ fn make_bytearray_batch( /// name | service.name fn make_names_batch(name: &str, service_name_values: Vec<&str>) -> RecordBatch { let num_rows = service_name_values.len(); - let name: StringArray = std::iter::repeat(Some(name)).take(num_rows).collect(); + let name: StringArray = std::iter::repeat_n(Some(name), num_rows).collect(); let service_name: StringArray = service_name_values.iter().map(Some).collect(); let schema = Schema::new(vec![ diff --git a/parquet/tests/arrow_reader/statistics.rs b/parquet/tests/arrow_reader/statistics.rs index 7a389fb5eb..9c230f79d8 100644 --- a/parquet/tests/arrow_reader/statistics.rs +++ b/parquet/tests/arrow_reader/statistics.rs @@ -82,7 +82,7 @@ impl Int64Case { Int64Array::from_iter( v64.into_iter() .map(Some) - .chain(std::iter::repeat(None).take(self.null_values)), + .chain(std::iter::repeat_n(None, self.null_values)), ) .to_data(), )],