This is an automated email from the ASF dual-hosted git repository.
mneumann pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new d4be752ef5 fix: clippy warnings from nightly rust 1.82 (#6348)
d4be752ef5 is described below
commit d4be752ef54ee30198d0aa1abd3838188482e992
Author: Ruihang Xia <[email protected]>
AuthorDate: Tue Sep 3 17:43:19 2024 +0800
fix: clippy warnings from nightly rust 1.82 (#6348)
Signed-off-by: Ruihang Xia <[email protected]>
---
arrow-data/src/ffi.rs | 2 +-
arrow-ipc/src/writer.rs | 6 +-
arrow-ord/src/sort.rs | 2 +-
arrow-schema/src/fields.rs | 3 +-
parquet/src/arrow/arrow_reader/statistics.rs | 10 +-
parquet/src/arrow/arrow_writer/mod.rs | 8 +-
parquet/src/data_type.rs | 7 +-
parquet/src/file/metadata/writer.rs | 41 +++----
parquet/src/schema/types.rs | 169 +++++++++++++--------------
9 files changed, 114 insertions(+), 134 deletions(-)
diff --git a/arrow-data/src/ffi.rs b/arrow-data/src/ffi.rs
index 589f7dac6d..3345595fac 100644
--- a/arrow-data/src/ffi.rs
+++ b/arrow-data/src/ffi.rs
@@ -324,6 +324,6 @@ mod tests {
assert_eq!(0, private_data.buffers_ptr.len());
- Box::into_raw(private_data);
+ let _ = Box::into_raw(private_data);
}
}
diff --git a/arrow-ipc/src/writer.rs b/arrow-ipc/src/writer.rs
index ade902f7ca..b09dcdc502 100644
--- a/arrow-ipc/src/writer.rs
+++ b/arrow-ipc/src/writer.rs
@@ -2572,7 +2572,7 @@ mod tests {
let mut fields = Vec::new();
let mut arrays = Vec::new();
for i in 0..num_cols {
- let field = Field::new(&format!("col_{}", i),
DataType::Decimal128(38, 10), true);
+ let field = Field::new(format!("col_{}", i),
DataType::Decimal128(38, 10), true);
let array = Decimal128Array::from(vec![num_cols as i128;
num_rows]);
fields.push(field);
arrays.push(Arc::new(array) as Arc<dyn Array>);
@@ -2627,7 +2627,7 @@ mod tests {
let mut fields = Vec::new();
let mut arrays = Vec::new();
for i in 0..num_cols {
- let field = Field::new(&format!("col_{}", i),
DataType::Decimal128(38, 10), true);
+ let field = Field::new(format!("col_{}", i),
DataType::Decimal128(38, 10), true);
let array = Decimal128Array::from(vec![num_cols as i128;
num_rows]);
fields.push(field);
arrays.push(Arc::new(array) as Arc<dyn Array>);
@@ -2682,7 +2682,7 @@ mod tests {
let mut fields = Vec::new();
let options = IpcWriteOptions::try_new(8, false,
MetadataVersion::V5).unwrap();
for i in 0..num_cols {
- let field = Field::new(&format!("col_{}", i),
DataType::Decimal128(38, 10), true);
+ let field = Field::new(format!("col_{}", i),
DataType::Decimal128(38, 10), true);
fields.push(field);
}
let schema = Schema::new(fields);
diff --git a/arrow-ord/src/sort.rs b/arrow-ord/src/sort.rs
index 140d878f39..168f82747c 100644
--- a/arrow-ord/src/sort.rs
+++ b/arrow-ord/src/sort.rs
@@ -403,7 +403,7 @@ fn sort_fixed_size_list(
}
#[inline(never)]
-fn sort_impl<T: ?Sized + Copy>(
+fn sort_impl<T: Copy>(
options: SortOptions,
valids: &mut [(u32, T)],
nulls: &[u32],
diff --git a/arrow-schema/src/fields.rs b/arrow-schema/src/fields.rs
index 63aef18ddf..5b9ce2a6da 100644
--- a/arrow-schema/src/fields.rs
+++ b/arrow-schema/src/fields.rs
@@ -389,14 +389,13 @@ impl UnionFields {
let mut set = 0_u128;
type_ids
.into_iter()
- .map(|idx| {
+ .inspect(|&idx| {
let mask = 1_u128 << idx;
if (set & mask) != 0 {
panic!("duplicate type id: {}", idx);
} else {
set |= mask;
}
- idx
})
.zip(fields)
.collect()
diff --git a/parquet/src/arrow/arrow_reader/statistics.rs
b/parquet/src/arrow/arrow_reader/statistics.rs
index 602a9ad5e5..4c0dac05c7 100644
--- a/parquet/src/arrow/arrow_reader/statistics.rs
+++ b/parquet/src/arrow/arrow_reader/statistics.rs
@@ -568,13 +568,9 @@ macro_rules! make_data_page_stats_iterator {
let next = self.iter.next();
match next {
Some((len, index)) => match index {
- $index_type(native_index) => Some(
- native_index
- .indexes
- .iter()
- .map(|x| $func(x))
- .collect::<Vec<_>>(),
- ),
+ $index_type(native_index) => {
+
Some(native_index.indexes.iter().map($func).collect::<Vec<_>>())
+ }
// No matching `Index` found;
// thus no statistics that can be extracted.
// We return vec![None; len] to effectively
diff --git a/parquet/src/arrow/arrow_writer/mod.rs
b/parquet/src/arrow/arrow_writer/mod.rs
index 5c5f28ac0f..3ec7a3dfea 100644
--- a/parquet/src/arrow/arrow_writer/mod.rs
+++ b/parquet/src/arrow/arrow_writer/mod.rs
@@ -3082,8 +3082,8 @@ mod tests {
let min = byte_array_stats.min_opt().unwrap();
let max = byte_array_stats.max_opt().unwrap();
- assert_eq!(min.as_bytes(), &[b'a']);
- assert_eq!(max.as_bytes(), &[b'd']);
+ assert_eq!(min.as_bytes(), b"a");
+ assert_eq!(max.as_bytes(), b"d");
} else {
panic!("expecting Statistics::ByteArray");
}
@@ -3154,8 +3154,8 @@ mod tests {
let min = byte_array_stats.min_opt().unwrap();
let max = byte_array_stats.max_opt().unwrap();
- assert_eq!(min.as_bytes(), &[b'a']);
- assert_eq!(max.as_bytes(), &[b'd']);
+ assert_eq!(min.as_bytes(), b"a");
+ assert_eq!(max.as_bytes(), b"d");
} else {
panic!("expecting Statistics::ByteArray");
}
diff --git a/parquet/src/data_type.rs b/parquet/src/data_type.rs
index 324e1c379b..a3bcfd1673 100644
--- a/parquet/src/data_type.rs
+++ b/parquet/src/data_type.rs
@@ -1298,11 +1298,8 @@ mod tests {
#[test]
fn test_byte_array_from() {
- assert_eq!(
- ByteArray::from(vec![b'A', b'B', b'C']).data(),
- &[b'A', b'B', b'C']
- );
- assert_eq!(ByteArray::from("ABC").data(), &[b'A', b'B', b'C']);
+ assert_eq!(ByteArray::from(b"ABC".to_vec()).data(), b"ABC");
+ assert_eq!(ByteArray::from("ABC").data(), b"ABC");
assert_eq!(
ByteArray::from(Bytes::from(vec![1u8, 2u8, 3u8, 4u8, 5u8])).data(),
&[1u8, 2u8, 3u8, 4u8, 5u8]
diff --git a/parquet/src/file/metadata/writer.rs
b/parquet/src/file/metadata/writer.rs
index dad960790e..92ce60556c 100644
--- a/parquet/src/file/metadata/writer.rs
+++ b/parquet/src/file/metadata/writer.rs
@@ -55,18 +55,14 @@ impl<'a, W: Write> ThriftMetadataWriter<'a, W> {
// write offset index to the file
for (row_group_idx, row_group) in
self.row_groups.iter_mut().enumerate() {
for (column_idx, column_metadata) in
row_group.columns.iter_mut().enumerate() {
- match &offset_indexes[row_group_idx][column_idx] {
- Some(offset_index) => {
- let start_offset = self.buf.bytes_written();
- let mut protocol = TCompactOutputProtocol::new(&mut
self.buf);
- offset_index.write_to_out_protocol(&mut protocol)?;
- let end_offset = self.buf.bytes_written();
- // set offset and index for offset index
- column_metadata.offset_index_offset =
Some(start_offset as i64);
- column_metadata.offset_index_length =
- Some((end_offset - start_offset) as i32);
- }
- None => {}
+ if let Some(offset_index) =
&offset_indexes[row_group_idx][column_idx] {
+ let start_offset = self.buf.bytes_written();
+ let mut protocol = TCompactOutputProtocol::new(&mut
self.buf);
+ offset_index.write_to_out_protocol(&mut protocol)?;
+ let end_offset = self.buf.bytes_written();
+ // set offset and index for offset index
+ column_metadata.offset_index_offset = Some(start_offset as
i64);
+ column_metadata.offset_index_length = Some((end_offset -
start_offset) as i32);
}
}
}
@@ -84,18 +80,14 @@ impl<'a, W: Write> ThriftMetadataWriter<'a, W> {
// write column index to the file
for (row_group_idx, row_group) in
self.row_groups.iter_mut().enumerate() {
for (column_idx, column_metadata) in
row_group.columns.iter_mut().enumerate() {
- match &column_indexes[row_group_idx][column_idx] {
- Some(column_index) => {
- let start_offset = self.buf.bytes_written();
- let mut protocol = TCompactOutputProtocol::new(&mut
self.buf);
- column_index.write_to_out_protocol(&mut protocol)?;
- let end_offset = self.buf.bytes_written();
- // set offset and index for offset index
- column_metadata.column_index_offset =
Some(start_offset as i64);
- column_metadata.column_index_length =
- Some((end_offset - start_offset) as i32);
- }
- None => {}
+ if let Some(column_index) =
&column_indexes[row_group_idx][column_idx] {
+ let start_offset = self.buf.bytes_written();
+ let mut protocol = TCompactOutputProtocol::new(&mut
self.buf);
+ column_index.write_to_out_protocol(&mut protocol)?;
+ let end_offset = self.buf.bytes_written();
+ // set offset and index for offset index
+ column_metadata.column_index_offset = Some(start_offset as
i64);
+ column_metadata.column_index_length = Some((end_offset -
start_offset) as i32);
}
}
}
@@ -524,7 +516,6 @@ mod tests {
async fn load_metadata_from_bytes(file_size: usize, data: Bytes) ->
ParquetMetaData {
use crate::arrow::async_reader::{MetadataFetch, MetadataLoader};
use crate::errors::Result as ParquetResult;
- use bytes::Bytes;
use futures::future::BoxFuture;
use futures::FutureExt;
use std::ops::Range;
diff --git a/parquet/src/schema/types.rs b/parquet/src/schema/types.rs
index 190374fd88..2665f28fed 100644
--- a/parquet/src/schema/types.rs
+++ b/parquet/src/schema/types.rs
@@ -294,105 +294,102 @@ impl<'a> PrimitiveTypeBuilder<'a> {
));
}
- match &self.logical_type {
- Some(logical_type) => {
- // If a converted type is populated, check that it is
consistent with
- // its logical type
- if self.converted_type != ConvertedType::NONE {
- if ConvertedType::from(self.logical_type.clone()) !=
self.converted_type {
- return Err(general_err!(
- "Logical type {:?} is incompatible with converted
type {} for field '{}'",
- logical_type,
- self.converted_type,
- self.name
- ));
- }
- } else {
- // Populate the converted type for backwards compatibility
- basic_info.converted_type =
self.logical_type.clone().into();
+ if let Some(logical_type) = &self.logical_type {
+ // If a converted type is populated, check that it is consistent
with
+ // its logical type
+ if self.converted_type != ConvertedType::NONE {
+ if ConvertedType::from(self.logical_type.clone()) !=
self.converted_type {
+ return Err(general_err!(
+ "Logical type {:?} is incompatible with converted type
{} for field '{}'",
+ logical_type,
+ self.converted_type,
+ self.name
+ ));
+ }
+ } else {
+ // Populate the converted type for backwards compatibility
+ basic_info.converted_type = self.logical_type.clone().into();
+ }
+ // Check that logical type and physical type are compatible
+ match (logical_type, self.physical_type) {
+ (LogicalType::Map, _) | (LogicalType::List, _) => {
+ return Err(general_err!(
+ "{:?} cannot be applied to a primitive type for field
'{}'",
+ logical_type,
+ self.name
+ ));
}
- // Check that logical type and physical type are compatible
- match (logical_type, self.physical_type) {
- (LogicalType::Map, _) | (LogicalType::List, _) => {
+ (LogicalType::Enum, PhysicalType::BYTE_ARRAY) => {}
+ (LogicalType::Decimal { scale, precision }, _) => {
+ // Check that scale and precision are consistent with
legacy values
+ if *scale != self.scale {
return Err(general_err!(
- "{:?} cannot be applied to a primitive type for
field '{}'",
- logical_type,
+ "DECIMAL logical type scale {} must match
self.scale {} for field '{}'",
+ scale,
+ self.scale,
self.name
));
}
- (LogicalType::Enum, PhysicalType::BYTE_ARRAY) => {}
- (LogicalType::Decimal { scale, precision }, _) => {
- // Check that scale and precision are consistent with
legacy values
- if *scale != self.scale {
- return Err(general_err!(
- "DECIMAL logical type scale {} must match
self.scale {} for field '{}'",
- scale,
- self.scale,
- self.name
- ));
- }
- if *precision != self.precision {
- return Err(general_err!(
- "DECIMAL logical type precision {} must match
self.precision {} for field '{}'",
- precision,
- self.precision,
- self.name
- ));
- }
- self.check_decimal_precision_scale()?;
- }
- (LogicalType::Date, PhysicalType::INT32) => {}
- (
- LogicalType::Time {
- unit: TimeUnit::MILLIS(_),
- ..
- },
- PhysicalType::INT32,
- ) => {}
- (LogicalType::Time { unit, .. }, PhysicalType::INT64) => {
- if *unit == TimeUnit::MILLIS(Default::default()) {
- return Err(general_err!(
- "Cannot use millisecond unit on INT64 type for
field '{}'",
- self.name
- ));
- }
- }
- (LogicalType::Timestamp { .. }, PhysicalType::INT64) => {}
- (LogicalType::Integer { bit_width, .. },
PhysicalType::INT32)
- if *bit_width <= 32 => {}
- (LogicalType::Integer { bit_width, .. },
PhysicalType::INT64)
- if *bit_width == 64 => {}
- // Null type
- (LogicalType::Unknown, PhysicalType::INT32) => {}
- (LogicalType::String, PhysicalType::BYTE_ARRAY) => {}
- (LogicalType::Json, PhysicalType::BYTE_ARRAY) => {}
- (LogicalType::Bson, PhysicalType::BYTE_ARRAY) => {}
- (LogicalType::Uuid, PhysicalType::FIXED_LEN_BYTE_ARRAY) if
self.length == 16 => {}
- (LogicalType::Uuid, PhysicalType::FIXED_LEN_BYTE_ARRAY) =>
{
- return Err(general_err!(
- "UUID cannot annotate field '{}' because it is not
a FIXED_LEN_BYTE_ARRAY(16) field",
- self.name
- ))
- }
- (LogicalType::Float16, PhysicalType::FIXED_LEN_BYTE_ARRAY)
- if self.length == 2 => {}
- (LogicalType::Float16, PhysicalType::FIXED_LEN_BYTE_ARRAY)
=> {
+ if *precision != self.precision {
return Err(general_err!(
- "FLOAT16 cannot annotate field '{}' because it is
not a FIXED_LEN_BYTE_ARRAY(2) field",
+ "DECIMAL logical type precision {} must match
self.precision {} for field '{}'",
+ precision,
+ self.precision,
self.name
- ))
+ ));
}
- (a, b) => {
+ self.check_decimal_precision_scale()?;
+ }
+ (LogicalType::Date, PhysicalType::INT32) => {}
+ (
+ LogicalType::Time {
+ unit: TimeUnit::MILLIS(_),
+ ..
+ },
+ PhysicalType::INT32,
+ ) => {}
+ (LogicalType::Time { unit, .. }, PhysicalType::INT64) => {
+ if *unit == TimeUnit::MILLIS(Default::default()) {
return Err(general_err!(
- "Cannot annotate {:?} from {} for field '{}'",
- a,
- b,
+ "Cannot use millisecond unit on INT64 type for
field '{}'",
self.name
- ))
+ ));
}
}
+ (LogicalType::Timestamp { .. }, PhysicalType::INT64) => {}
+ (LogicalType::Integer { bit_width, .. }, PhysicalType::INT32)
+ if *bit_width <= 32 => {}
+ (LogicalType::Integer { bit_width, .. }, PhysicalType::INT64)
+ if *bit_width == 64 => {}
+ // Null type
+ (LogicalType::Unknown, PhysicalType::INT32) => {}
+ (LogicalType::String, PhysicalType::BYTE_ARRAY) => {}
+ (LogicalType::Json, PhysicalType::BYTE_ARRAY) => {}
+ (LogicalType::Bson, PhysicalType::BYTE_ARRAY) => {}
+ (LogicalType::Uuid, PhysicalType::FIXED_LEN_BYTE_ARRAY) if
self.length == 16 => {}
+ (LogicalType::Uuid, PhysicalType::FIXED_LEN_BYTE_ARRAY) => {
+ return Err(general_err!(
+ "UUID cannot annotate field '{}' because it is not a
FIXED_LEN_BYTE_ARRAY(16) field",
+ self.name
+ ))
+ }
+ (LogicalType::Float16, PhysicalType::FIXED_LEN_BYTE_ARRAY)
+ if self.length == 2 => {}
+ (LogicalType::Float16, PhysicalType::FIXED_LEN_BYTE_ARRAY) => {
+ return Err(general_err!(
+ "FLOAT16 cannot annotate field '{}' because it is not
a FIXED_LEN_BYTE_ARRAY(2) field",
+ self.name
+ ))
+ }
+ (a, b) => {
+ return Err(general_err!(
+ "Cannot annotate {:?} from {} for field '{}'",
+ a,
+ b,
+ self.name
+ ))
+ }
}
- None => {}
}
match self.converted_type {