This is an automated email from the ASF dual-hosted git repository.

dheres pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 2e52580b0f Introduce HashMap and HashSet type aliases (#13236)
2e52580b0f is described below

commit 2e52580b0f10c35b36d02c6f44e13a460de771c9
Author: David Rauschenbach <[email protected]>
AuthorDate: Mon Nov 4 23:08:12 2024 -0800

    Introduce HashMap and HashSet type aliases (#13236)
    
    * Unite all references to hashbrown::HashMap by using a common type 
definition
    
    * Replace some use of std::collections::HashMap with hashbrown::HashMap
    
    * Replace some use of std::collections::HashMap with hashbrown::HashMap
    
    * Replace some use of std::collections::HashMap with hashbrown::HashMap
    
    * Unite all references to hashbrown::HashSet by using a common type 
definition
    
    * Replace some use of std::collections::HashSet with hashbrown::HashSet
---
 datafusion/common/src/functional_dependencies.rs                   | 3 +--
 datafusion/common/src/lib.rs                                       | 5 +++++
 datafusion/core/src/bin/print_functions_docs.rs                    | 3 +--
 datafusion/core/src/catalog_common/listing_schema.rs               | 6 ++++--
 datafusion/core/src/datasource/file_format/parquet.rs              | 2 +-
 datafusion/core/src/datasource/listing/helpers.rs                  | 3 +--
 datafusion/core/src/physical_optimizer/sort_pushdown.rs            | 4 +---
 datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs                 | 2 +-
 datafusion/core/tests/fuzz_cases/window_fuzz.rs                    | 2 +-
 .../core/tests/user_defined/user_defined_scalar_functions.rs       | 4 ++--
 datafusion/execution/src/memory_pool/pool.rs                       | 2 +-
 datafusion/expr/src/conditional_expressions.rs                     | 3 +--
 datafusion/expr/src/execution_props.rs                             | 2 +-
 datafusion/expr/src/expr.rs                                        | 4 ++--
 datafusion/expr/src/registry.rs                                    | 4 ++--
 datafusion/expr/src/utils.rs                                       | 4 ++--
 datafusion/functions-aggregate/src/median.rs                       | 3 +--
 datafusion/functions-aggregate/src/regr.rs                         | 7 ++++---
 datafusion/functions-nested/src/except.rs                          | 3 +--
 datafusion/functions-nested/src/map.rs                             | 4 ++--
 datafusion/functions/src/core/named_struct.rs                      | 3 +--
 datafusion/functions/src/unicode/translate.rs                      | 2 +-
 datafusion/optimizer/src/decorrelate.rs                            | 4 ++--
 datafusion/optimizer/src/optimize_projections/mod.rs               | 4 ++--
 datafusion/optimizer/src/optimizer.rs                              | 3 +--
 datafusion/optimizer/src/single_distinct_to_groupby.rs             | 6 +++---
 datafusion/physical-expr-common/src/binary_view_map.rs             | 2 +-
 datafusion/physical-expr/src/expressions/in_list.rs                | 2 +-
 datafusion/physical-expr/src/utils/guarantee.rs                    | 4 ++--
 datafusion/physical-expr/src/utils/mod.rs                          | 4 +---
 datafusion/physical-plan/src/joins/sort_merge_join.rs              | 5 ++---
 datafusion/physical-plan/src/joins/stream_join_utils.rs            | 3 +--
 datafusion/physical-plan/src/joins/symmetric_hash_join.rs          | 3 +--
 datafusion/physical-plan/src/metrics/mod.rs                        | 2 +-
 datafusion/physical-plan/src/repartition/mod.rs                    | 2 +-
 datafusion/physical-plan/src/topk/mod.rs                           | 2 +-
 datafusion/physical-plan/src/unnest.rs                             | 4 +---
 datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs    | 6 ++++--
 datafusion/sql/src/unparser/rewrite.rs                             | 7 ++-----
 datafusion/sql/src/utils.rs                                        | 5 ++---
 datafusion/substrait/src/extensions.rs                             | 3 +--
 41 files changed, 67 insertions(+), 79 deletions(-)

diff --git a/datafusion/common/src/functional_dependencies.rs 
b/datafusion/common/src/functional_dependencies.rs
index 31eafc7443..984d8ca267 100644
--- a/datafusion/common/src/functional_dependencies.rs
+++ b/datafusion/common/src/functional_dependencies.rs
@@ -18,13 +18,12 @@
 //! FunctionalDependencies keeps track of functional dependencies
 //! inside DFSchema.
 
-use std::collections::HashSet;
 use std::fmt::{Display, Formatter};
 use std::ops::Deref;
 use std::vec::IntoIter;
 
 use crate::utils::{merge_and_order_indices, set_difference};
-use crate::{DFSchema, JoinType};
+use crate::{DFSchema, HashSet, JoinType};
 
 /// This object defines a constraint on a table.
 #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
diff --git a/datafusion/common/src/lib.rs b/datafusion/common/src/lib.rs
index 08431a36e8..618e88fb83 100644
--- a/datafusion/common/src/lib.rs
+++ b/datafusion/common/src/lib.rs
@@ -66,6 +66,7 @@ pub use functional_dependencies::{
     get_target_functional_dependencies, Constraint, Constraints, Dependency,
     FunctionalDependence, FunctionalDependencies,
 };
+use hashbrown::hash_map::DefaultHashBuilder;
 pub use join_type::{JoinConstraint, JoinSide, JoinType};
 pub use param_value::ParamValues;
 pub use scalar::{ScalarType, ScalarValue};
@@ -87,6 +88,10 @@ pub use error::{
     _substrait_datafusion_err,
 };
 
+// The HashMap and HashSet implementations that should be used as the uniform 
defaults
+pub type HashMap<K, V, S = DefaultHashBuilder> = hashbrown::HashMap<K, V, S>;
+pub type HashSet<T, S = DefaultHashBuilder> = hashbrown::HashSet<T, S>;
+
 /// Downcast an Arrow Array to a concrete type, return an 
`DataFusionError::Internal` if the cast is
 /// not possible. In normal usage of DataFusion the downcast should always 
succeed.
 ///
diff --git a/datafusion/core/src/bin/print_functions_docs.rs 
b/datafusion/core/src/bin/print_functions_docs.rs
index 3aedcbc2aa..7f3990c534 100644
--- a/datafusion/core/src/bin/print_functions_docs.rs
+++ b/datafusion/core/src/bin/print_functions_docs.rs
@@ -16,12 +16,11 @@
 // under the License.
 
 use datafusion::execution::SessionStateDefaults;
-use datafusion_common::{not_impl_err, Result};
+use datafusion_common::{not_impl_err, HashSet, Result};
 use datafusion_expr::{
     aggregate_doc_sections, scalar_doc_sections, window_doc_sections, 
AggregateUDF,
     DocSection, Documentation, ScalarUDF, WindowUDF,
 };
-use hashbrown::HashSet;
 use itertools::Itertools;
 use std::env::args;
 use std::fmt::Write as _;
diff --git a/datafusion/core/src/catalog_common/listing_schema.rs 
b/datafusion/core/src/catalog_common/listing_schema.rs
index 665ea58c5f..67952770f4 100644
--- a/datafusion/core/src/catalog_common/listing_schema.rs
+++ b/datafusion/core/src/catalog_common/listing_schema.rs
@@ -18,14 +18,16 @@
 //! [`ListingSchemaProvider`]: [`SchemaProvider`] that scans ObjectStores for 
tables automatically
 
 use std::any::Any;
-use std::collections::{HashMap, HashSet};
+use std::collections::HashSet;
 use std::path::Path;
 use std::sync::{Arc, Mutex};
 
 use crate::catalog::{SchemaProvider, TableProvider, TableProviderFactory};
 use crate::execution::context::SessionState;
 
-use datafusion_common::{Constraints, DFSchema, DataFusionError, 
TableReference};
+use datafusion_common::{
+    Constraints, DFSchema, DataFusionError, HashMap, TableReference,
+};
 use datafusion_expr::CreateExternalTable;
 
 use async_trait::async_trait;
diff --git a/datafusion/core/src/datasource/file_format/parquet.rs 
b/datafusion/core/src/datasource/file_format/parquet.rs
index b3f54e0773..e27a13b6e7 100644
--- a/datafusion/core/src/datasource/file_format/parquet.rs
+++ b/datafusion/core/src/datasource/file_format/parquet.rs
@@ -63,7 +63,7 @@ use datafusion_physical_plan::metrics::MetricsSet;
 
 use async_trait::async_trait;
 use bytes::Bytes;
-use hashbrown::HashMap;
+use datafusion_common::HashMap;
 use log::debug;
 use object_store::buffered::BufWriter;
 use parquet::arrow::arrow_writer::{
diff --git a/datafusion/core/src/datasource/listing/helpers.rs 
b/datafusion/core/src/datasource/listing/helpers.rs
index 47012f777a..1b3588d9a2 100644
--- a/datafusion/core/src/datasource/listing/helpers.rs
+++ b/datafusion/core/src/datasource/listing/helpers.rs
@@ -17,7 +17,6 @@
 
 //! Helper functions for the table implementation
 
-use std::collections::HashMap;
 use std::mem;
 use std::sync::Arc;
 
@@ -25,7 +24,7 @@ use super::ListingTableUrl;
 use super::PartitionedFile;
 use crate::execution::context::SessionState;
 use datafusion_common::internal_err;
-use datafusion_common::{Result, ScalarValue};
+use datafusion_common::{HashMap, Result, ScalarValue};
 use datafusion_expr::{BinaryExpr, Operator};
 
 use arrow::{
diff --git a/datafusion/core/src/physical_optimizer/sort_pushdown.rs 
b/datafusion/core/src/physical_optimizer/sort_pushdown.rs
index 9eb200f534..1a53077b1f 100644
--- a/datafusion/core/src/physical_optimizer/sort_pushdown.rs
+++ b/datafusion/core/src/physical_optimizer/sort_pushdown.rs
@@ -32,7 +32,7 @@ use crate::physical_plan::{ExecutionPlan, 
ExecutionPlanProperties};
 use datafusion_common::tree_node::{
     ConcreteTreeNode, Transformed, TreeNode, TreeNodeRecursion,
 };
-use datafusion_common::{plan_err, JoinSide, Result};
+use datafusion_common::{plan_err, HashSet, JoinSide, Result};
 use datafusion_expr::JoinType;
 use datafusion_physical_expr::expressions::Column;
 use datafusion_physical_expr::utils::collect_columns;
@@ -41,8 +41,6 @@ use datafusion_physical_expr_common::sort_expr::{
     LexOrdering, LexOrderingRef, LexRequirement,
 };
 
-use hashbrown::HashSet;
-
 /// This is a "data class" we use within the [`EnforceSorting`] rule to push
 /// down [`SortExec`] in the plan. In some cases, we can reduce the total
 /// computational cost by pushing down `SortExec`s through some executors. The
diff --git a/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs 
b/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs
index 21f604e6c6..4cb2b1bfbc 100644
--- a/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs
+++ b/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs
@@ -42,8 +42,8 @@ use test_utils::{add_empty_batches, StringBatchGenerator};
 use crate::fuzz_cases::aggregation_fuzzer::{
     AggregationFuzzerBuilder, ColumnDescr, DatasetGeneratorConfig, 
QueryBuilder,
 };
+use datafusion_common::HashMap;
 use datafusion_physical_expr_common::sort_expr::LexOrdering;
-use hashbrown::HashMap;
 use rand::rngs::StdRng;
 use rand::{Rng, SeedableRng};
 use tokio::task::JoinSet;
diff --git a/datafusion/core/tests/fuzz_cases/window_fuzz.rs 
b/datafusion/core/tests/fuzz_cases/window_fuzz.rs
index 5bfb4d97ed..e883207f7b 100644
--- a/datafusion/core/tests/fuzz_cases/window_fuzz.rs
+++ b/datafusion/core/tests/fuzz_cases/window_fuzz.rs
@@ -45,10 +45,10 @@ use datafusion_physical_expr::{PhysicalExpr, 
PhysicalSortExpr};
 use test_utils::add_empty_batches;
 
 use datafusion::functions_window::row_number::row_number_udwf;
+use datafusion_common::HashMap;
 use datafusion_functions_window::lead_lag::{lag_udwf, lead_udwf};
 use datafusion_functions_window::rank::{dense_rank_udwf, rank_udwf};
 use datafusion_physical_expr_common::sort_expr::LexOrdering;
-use hashbrown::HashMap;
 use rand::distributions::Alphanumeric;
 use rand::rngs::StdRng;
 use rand::{Rng, SeedableRng};
diff --git 
a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs 
b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs
index f1b1728623..8453a360cd 100644
--- a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs
+++ b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs
@@ -16,7 +16,6 @@
 // under the License.
 
 use std::any::Any;
-use std::collections::HashMap;
 use std::hash::{DefaultHasher, Hash, Hasher};
 use std::sync::Arc;
 
@@ -39,7 +38,8 @@ use datafusion_common::cast::{as_float64_array, 
as_int32_array};
 use datafusion_common::tree_node::{Transformed, TreeNode};
 use datafusion_common::{
     assert_batches_eq, assert_batches_sorted_eq, assert_contains, exec_err, 
internal_err,
-    not_impl_err, plan_err, DFSchema, DataFusionError, ExprSchema, Result, 
ScalarValue,
+    not_impl_err, plan_err, DFSchema, DataFusionError, ExprSchema, HashMap, 
Result,
+    ScalarValue,
 };
 use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
 use datafusion_expr::{
diff --git a/datafusion/execution/src/memory_pool/pool.rs 
b/datafusion/execution/src/memory_pool/pool.rs
index e169c1f319..c2ec42d0df 100644
--- a/datafusion/execution/src/memory_pool/pool.rs
+++ b/datafusion/execution/src/memory_pool/pool.rs
@@ -16,8 +16,8 @@
 // under the License.
 
 use crate::memory_pool::{MemoryConsumer, MemoryPool, MemoryReservation};
+use datafusion_common::HashMap;
 use datafusion_common::{resources_datafusion_err, DataFusionError, Result};
-use hashbrown::HashMap;
 use log::debug;
 use parking_lot::Mutex;
 use std::{
diff --git a/datafusion/expr/src/conditional_expressions.rs 
b/datafusion/expr/src/conditional_expressions.rs
index 23cc88f1c0..9cb51612d0 100644
--- a/datafusion/expr/src/conditional_expressions.rs
+++ b/datafusion/expr/src/conditional_expressions.rs
@@ -19,8 +19,7 @@
 use crate::expr::Case;
 use crate::{expr_schema::ExprSchemable, Expr};
 use arrow::datatypes::DataType;
-use datafusion_common::{plan_err, DFSchema, Result};
-use std::collections::HashSet;
+use datafusion_common::{plan_err, DFSchema, HashSet, Result};
 
 /// Helper struct for building [Expr::Case]
 pub struct CaseBuilder {
diff --git a/datafusion/expr/src/execution_props.rs 
b/datafusion/expr/src/execution_props.rs
index 3401a94b27..d672bd1acc 100644
--- a/datafusion/expr/src/execution_props.rs
+++ b/datafusion/expr/src/execution_props.rs
@@ -18,7 +18,7 @@
 use crate::var_provider::{VarProvider, VarType};
 use chrono::{DateTime, TimeZone, Utc};
 use datafusion_common::alias::AliasGenerator;
-use std::collections::HashMap;
+use datafusion_common::HashMap;
 use std::sync::Arc;
 
 /// Holds per-query execution properties and data (such as statement
diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs
index a9c183952f..d3a3852a1e 100644
--- a/datafusion/expr/src/expr.rs
+++ b/datafusion/expr/src/expr.rs
@@ -17,7 +17,7 @@
 
 //! Logical Expressions: [`Expr`]
 
-use std::collections::{HashMap, HashSet};
+use std::collections::HashSet;
 use std::fmt::{self, Display, Formatter, Write};
 use std::hash::{Hash, Hasher};
 use std::mem;
@@ -39,7 +39,7 @@ use datafusion_common::tree_node::{
     Transformed, TransformedResult, TreeNode, TreeNodeRecursion,
 };
 use datafusion_common::{
-    plan_err, Column, DFSchema, Result, ScalarValue, TableReference,
+    plan_err, Column, DFSchema, HashMap, Result, ScalarValue, TableReference,
 };
 use datafusion_functions_window_common::field::WindowUDFFieldArgs;
 use sqlparser::ast::{
diff --git a/datafusion/expr/src/registry.rs b/datafusion/expr/src/registry.rs
index 6d3457f70d..4eb49710bc 100644
--- a/datafusion/expr/src/registry.rs
+++ b/datafusion/expr/src/registry.rs
@@ -20,8 +20,8 @@
 use crate::expr_rewriter::FunctionRewrite;
 use crate::planner::ExprPlanner;
 use crate::{AggregateUDF, ScalarUDF, UserDefinedLogicalNode, WindowUDF};
-use datafusion_common::{not_impl_err, plan_datafusion_err, Result};
-use std::collections::{HashMap, HashSet};
+use datafusion_common::{not_impl_err, plan_datafusion_err, HashMap, Result};
+use std::collections::HashSet;
 use std::fmt::Debug;
 use std::sync::Arc;
 
diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs
index 29c62440ab..c22ee244fe 100644
--- a/datafusion/expr/src/utils.rs
+++ b/datafusion/expr/src/utils.rs
@@ -18,7 +18,7 @@
 //! Expression utilities
 
 use std::cmp::Ordering;
-use std::collections::{HashMap, HashSet};
+use std::collections::HashSet;
 use std::ops::Deref;
 use std::sync::Arc;
 
@@ -36,7 +36,7 @@ use datafusion_common::tree_node::{
 use datafusion_common::utils::get_at_indices;
 use datafusion_common::{
     internal_err, plan_datafusion_err, plan_err, Column, DFSchema, DFSchemaRef,
-    DataFusionError, Result, TableReference,
+    DataFusionError, HashMap, Result, TableReference,
 };
 
 use indexmap::IndexSet;
diff --git a/datafusion/functions-aggregate/src/median.rs 
b/datafusion/functions-aggregate/src/median.rs
index ff0a930d49..a7114bb68b 100644
--- a/datafusion/functions-aggregate/src/median.rs
+++ b/datafusion/functions-aggregate/src/median.rs
@@ -15,7 +15,6 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use std::collections::HashSet;
 use std::fmt::{Debug, Formatter};
 use std::mem::{size_of, size_of_val};
 use std::sync::{Arc, OnceLock};
@@ -33,7 +32,7 @@ use arrow::array::Array;
 use arrow::array::ArrowNativeTypeOp;
 use arrow::datatypes::ArrowNativeType;
 
-use datafusion_common::{DataFusionError, Result, ScalarValue};
+use datafusion_common::{DataFusionError, HashSet, Result, ScalarValue};
 use datafusion_expr::aggregate_doc_sections::DOC_SECTION_GENERAL;
 use datafusion_expr::function::StateFieldsArgs;
 use datafusion_expr::{
diff --git a/datafusion/functions-aggregate/src/regr.rs 
b/datafusion/functions-aggregate/src/regr.rs
index bf1e81949d..9dd13634ff 100644
--- a/datafusion/functions-aggregate/src/regr.rs
+++ b/datafusion/functions-aggregate/src/regr.rs
@@ -24,8 +24,10 @@ use arrow::{
     datatypes::DataType,
     datatypes::Field,
 };
-use datafusion_common::{downcast_value, plan_err, unwrap_or_internal_err, 
ScalarValue};
-use datafusion_common::{DataFusionError, Result};
+use datafusion_common::{
+    downcast_value, plan_err, unwrap_or_internal_err, DataFusionError, 
HashMap, Result,
+    ScalarValue,
+};
 use datafusion_expr::aggregate_doc_sections::DOC_SECTION_STATISTICAL;
 use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
 use datafusion_expr::type_coercion::aggregates::NUMERICS;
@@ -34,7 +36,6 @@ use datafusion_expr::{
     Accumulator, AggregateUDFImpl, Documentation, Signature, Volatility,
 };
 use std::any::Any;
-use std::collections::HashMap;
 use std::fmt::Debug;
 use std::mem::size_of_val;
 use std::sync::OnceLock;
diff --git a/datafusion/functions-nested/src/except.rs 
b/datafusion/functions-nested/src/except.rs
index 947d3c0182..100fb587d6 100644
--- a/datafusion/functions-nested/src/except.rs
+++ b/datafusion/functions-nested/src/except.rs
@@ -23,13 +23,12 @@ use arrow_array::cast::AsArray;
 use arrow_array::{Array, ArrayRef, GenericListArray, OffsetSizeTrait};
 use arrow_buffer::OffsetBuffer;
 use arrow_schema::{DataType, FieldRef};
-use datafusion_common::{exec_err, internal_err, Result};
+use datafusion_common::{exec_err, internal_err, HashSet, Result};
 use datafusion_expr::scalar_doc_sections::DOC_SECTION_ARRAY;
 use datafusion_expr::{
     ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
 };
 use std::any::Any;
-use std::collections::HashSet;
 use std::sync::{Arc, OnceLock};
 
 make_udf_expr_and_func!(
diff --git a/datafusion/functions-nested/src/map.rs 
b/datafusion/functions-nested/src/map.rs
index d7dce3bacb..cad193910c 100644
--- a/datafusion/functions-nested/src/map.rs
+++ b/datafusion/functions-nested/src/map.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use std::any::Any;
-use std::collections::{HashSet, VecDeque};
+use std::collections::VecDeque;
 use std::sync::{Arc, OnceLock};
 
 use arrow::array::ArrayData;
@@ -25,7 +25,7 @@ use arrow_buffer::{Buffer, ToByteSlice};
 use arrow_schema::{DataType, Field, SchemaBuilder};
 
 use datafusion_common::utils::{fixed_size_list_to_arrays, list_to_arrays};
-use datafusion_common::{exec_err, Result, ScalarValue};
+use datafusion_common::{exec_err, HashSet, Result, ScalarValue};
 use datafusion_expr::expr::ScalarFunction;
 use datafusion_expr::scalar_doc_sections::DOC_SECTION_MAP;
 use datafusion_expr::{
diff --git a/datafusion/functions/src/core/named_struct.rs 
b/datafusion/functions/src/core/named_struct.rs
index b2c7f06d58..d53dd2277f 100644
--- a/datafusion/functions/src/core/named_struct.rs
+++ b/datafusion/functions/src/core/named_struct.rs
@@ -17,11 +17,10 @@
 
 use arrow::array::StructArray;
 use arrow::datatypes::{DataType, Field, Fields};
-use datafusion_common::{exec_err, internal_err, Result, ScalarValue};
+use datafusion_common::{exec_err, internal_err, HashSet, Result, ScalarValue};
 use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRUCT;
 use datafusion_expr::{ColumnarValue, Documentation, Expr, ExprSchemable};
 use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
-use hashbrown::HashSet;
 use std::any::Any;
 use std::sync::{Arc, OnceLock};
 
diff --git a/datafusion/functions/src/unicode/translate.rs 
b/datafusion/functions/src/unicode/translate.rs
index fa626b396b..845d34c708 100644
--- a/datafusion/functions/src/unicode/translate.rs
+++ b/datafusion/functions/src/unicode/translate.rs
@@ -22,7 +22,7 @@ use arrow::array::{
     ArrayAccessor, ArrayIter, ArrayRef, AsArray, GenericStringArray, 
OffsetSizeTrait,
 };
 use arrow::datatypes::DataType;
-use hashbrown::HashMap;
+use datafusion_common::HashMap;
 use unicode_segmentation::UnicodeSegmentation;
 
 use crate::utils::{make_scalar_function, utf8_to_str_type};
diff --git a/datafusion/optimizer/src/decorrelate.rs 
b/datafusion/optimizer/src/decorrelate.rs
index 6aa59b77f7..b5726d9991 100644
--- a/datafusion/optimizer/src/decorrelate.rs
+++ b/datafusion/optimizer/src/decorrelate.rs
@@ -17,7 +17,7 @@
 
 //! [`PullUpCorrelatedExpr`] converts correlated subqueries to `Joins`
 
-use std::collections::{BTreeSet, HashMap};
+use std::collections::BTreeSet;
 use std::ops::Deref;
 use std::sync::Arc;
 
@@ -27,7 +27,7 @@ use crate::utils::collect_subquery_cols;
 use datafusion_common::tree_node::{
     Transformed, TransformedResult, TreeNode, TreeNodeRecursion, 
TreeNodeRewriter,
 };
-use datafusion_common::{plan_err, Column, DFSchemaRef, Result, ScalarValue};
+use datafusion_common::{plan_err, Column, DFSchemaRef, HashMap, Result, 
ScalarValue};
 use datafusion_expr::expr::Alias;
 use datafusion_expr::simplify::SimplifyContext;
 use datafusion_expr::utils::{conjunction, find_join_exprs, split_conjunction};
diff --git a/datafusion/optimizer/src/optimize_projections/mod.rs 
b/datafusion/optimizer/src/optimize_projections/mod.rs
index ec2225bbc0..67d888abda 100644
--- a/datafusion/optimizer/src/optimize_projections/mod.rs
+++ b/datafusion/optimizer/src/optimize_projections/mod.rs
@@ -19,7 +19,7 @@
 
 mod required_indices;
 
-use std::collections::{HashMap, HashSet};
+use std::collections::HashSet;
 use std::sync::Arc;
 
 use crate::optimizer::ApplyOrder;
@@ -27,7 +27,7 @@ use crate::{OptimizerConfig, OptimizerRule};
 
 use datafusion_common::{
     get_required_group_by_exprs_indices, internal_datafusion_err, 
internal_err, Column,
-    JoinType, Result,
+    HashMap, JoinType, Result,
 };
 use datafusion_expr::expr::Alias;
 use datafusion_expr::Unnest;
diff --git a/datafusion/optimizer/src/optimizer.rs 
b/datafusion/optimizer/src/optimizer.rs
index 90a790a0e8..975150cd61 100644
--- a/datafusion/optimizer/src/optimizer.rs
+++ b/datafusion/optimizer/src/optimizer.rs
@@ -17,7 +17,6 @@
 
 //! [`Optimizer`] and [`OptimizerRule`]
 
-use std::collections::HashSet;
 use std::fmt::Debug;
 use std::sync::Arc;
 
@@ -29,7 +28,7 @@ use datafusion_common::alias::AliasGenerator;
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::instant::Instant;
 use datafusion_common::tree_node::{Transformed, TreeNodeRewriter};
-use datafusion_common::{internal_err, DFSchema, DataFusionError, Result};
+use datafusion_common::{internal_err, DFSchema, DataFusionError, HashSet, 
Result};
 use datafusion_expr::logical_plan::LogicalPlan;
 
 use crate::common_subexpr_eliminate::CommonSubexprEliminate;
diff --git a/datafusion/optimizer/src/single_distinct_to_groupby.rs 
b/datafusion/optimizer/src/single_distinct_to_groupby.rs
index 01875349c9..c8f3a4bc78 100644
--- a/datafusion/optimizer/src/single_distinct_to_groupby.rs
+++ b/datafusion/optimizer/src/single_distinct_to_groupby.rs
@@ -22,7 +22,9 @@ use std::sync::Arc;
 use crate::optimizer::ApplyOrder;
 use crate::{OptimizerConfig, OptimizerRule};
 
-use datafusion_common::{internal_err, tree_node::Transformed, DataFusionError, 
Result};
+use datafusion_common::{
+    internal_err, tree_node::Transformed, DataFusionError, HashSet, Result,
+};
 use datafusion_expr::builder::project;
 use datafusion_expr::{
     col,
@@ -31,8 +33,6 @@ use datafusion_expr::{
     Expr,
 };
 
-use hashbrown::HashSet;
-
 /// single distinct to group by optimizer rule
 ///  ```text
 ///    Before:
diff --git a/datafusion/physical-expr-common/src/binary_view_map.rs 
b/datafusion/physical-expr-common/src/binary_view_map.rs
index c6768a19d3..e131ad8f50 100644
--- a/datafusion/physical-expr-common/src/binary_view_map.rs
+++ b/datafusion/physical-expr-common/src/binary_view_map.rs
@@ -393,7 +393,7 @@ where
 #[cfg(test)]
 mod tests {
     use arrow::array::{BinaryViewArray, GenericByteViewArray, StringViewArray};
-    use hashbrown::HashMap;
+    use datafusion_common::HashMap;
 
     use super::*;
 
diff --git a/datafusion/physical-expr/src/expressions/in_list.rs 
b/datafusion/physical-expr/src/expressions/in_list.rs
index cf57ce3e0e..1a3cd7600b 100644
--- a/datafusion/physical-expr/src/expressions/in_list.rs
+++ b/datafusion/physical-expr/src/expressions/in_list.rs
@@ -44,8 +44,8 @@ use datafusion_expr::ColumnarValue;
 use datafusion_physical_expr_common::datum::compare_with_eq;
 
 use ahash::RandomState;
+use datafusion_common::HashMap;
 use hashbrown::hash_map::RawEntryMut;
-use hashbrown::HashMap;
 
 /// InList
 pub struct InListExpr {
diff --git a/datafusion/physical-expr/src/utils/guarantee.rs 
b/datafusion/physical-expr/src/utils/guarantee.rs
index fbb59cc92f..2c37c4d8b3 100644
--- a/datafusion/physical-expr/src/utils/guarantee.rs
+++ b/datafusion/physical-expr/src/utils/guarantee.rs
@@ -20,9 +20,9 @@
 
 use crate::utils::split_disjunction;
 use crate::{split_conjunction, PhysicalExpr};
-use datafusion_common::{Column, ScalarValue};
+use datafusion_common::{Column, HashMap, ScalarValue};
 use datafusion_expr::Operator;
-use std::collections::{HashMap, HashSet};
+use std::collections::HashSet;
 use std::fmt::{self, Display, Formatter};
 use std::sync::Arc;
 
diff --git a/datafusion/physical-expr/src/utils/mod.rs 
b/datafusion/physical-expr/src/utils/mod.rs
index c3d1b1425b..73d744b4b6 100644
--- a/datafusion/physical-expr/src/utils/mod.rs
+++ b/datafusion/physical-expr/src/utils/mod.rs
@@ -17,10 +17,8 @@
 
 mod guarantee;
 pub use guarantee::{Guarantee, LiteralGuarantee};
-use hashbrown::HashSet;
 
 use std::borrow::Borrow;
-use std::collections::HashMap;
 use std::sync::Arc;
 
 use crate::expressions::{BinaryExpr, Column};
@@ -32,7 +30,7 @@ use arrow::datatypes::SchemaRef;
 use datafusion_common::tree_node::{
     Transformed, TransformedResult, TreeNode, TreeNodeRecursion,
 };
-use datafusion_common::Result;
+use datafusion_common::{HashMap, HashSet, Result};
 use datafusion_expr::Operator;
 
 use datafusion_physical_expr_common::sort_expr::{LexOrdering, LexOrderingRef};
diff --git a/datafusion/physical-plan/src/joins/sort_merge_join.rs 
b/datafusion/physical-plan/src/joins/sort_merge_join.rs
index 3ad892c880..2f6dc5fa0b 100644
--- a/datafusion/physical-plan/src/joins/sort_merge_join.rs
+++ b/datafusion/physical-plan/src/joins/sort_merge_join.rs
@@ -43,8 +43,8 @@ use arrow::error::ArrowError;
 use arrow::ipc::reader::FileReader;
 use arrow_array::types::UInt64Type;
 use datafusion_common::{
-    exec_err, internal_err, not_impl_err, plan_err, DataFusionError, JoinSide, 
JoinType,
-    Result,
+    exec_err, internal_err, not_impl_err, plan_err, DataFusionError, HashSet, 
JoinSide,
+    JoinType, Result,
 };
 use datafusion_execution::disk_manager::RefCountedTempFile;
 use datafusion_execution::memory_pool::{MemoryConsumer, MemoryReservation};
@@ -54,7 +54,6 @@ use 
datafusion_physical_expr::equivalence::join_equivalence_properties;
 use datafusion_physical_expr::{PhysicalExprRef, PhysicalSortRequirement};
 use datafusion_physical_expr_common::sort_expr::{LexOrdering, LexRequirement};
 use futures::{Stream, StreamExt};
-use hashbrown::HashSet;
 
 use crate::expressions::PhysicalSortExpr;
 use crate::joins::utils::{
diff --git a/datafusion/physical-plan/src/joins/stream_join_utils.rs 
b/datafusion/physical-plan/src/joins/stream_join_utils.rs
index 5ccdd9b40d..f08ce0ea2f 100644
--- a/datafusion/physical-plan/src/joins/stream_join_utils.rs
+++ b/datafusion/physical-plan/src/joins/stream_join_utils.rs
@@ -32,7 +32,7 @@ use arrow_buffer::{ArrowNativeType, BooleanBufferBuilder};
 use arrow_schema::{Schema, SchemaRef};
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
 use datafusion_common::{
-    arrow_datafusion_err, DataFusionError, JoinSide, Result, ScalarValue,
+    arrow_datafusion_err, DataFusionError, HashSet, JoinSide, Result, 
ScalarValue,
 };
 use datafusion_expr::interval_arithmetic::Interval;
 use datafusion_physical_expr::expressions::Column;
@@ -42,7 +42,6 @@ use datafusion_physical_expr::{PhysicalExpr, 
PhysicalSortExpr};
 
 use datafusion_physical_expr_common::sort_expr::LexOrderingRef;
 use hashbrown::raw::RawTable;
-use hashbrown::HashSet;
 
 /// Implementation of `JoinHashMapType` for `PruningJoinHashMap`.
 impl JoinHashMapType for PruningJoinHashMap {
diff --git a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs 
b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
index 5b6dc2cd2a..f082bdbdd3 100644
--- a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
+++ b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
@@ -64,7 +64,7 @@ use arrow::record_batch::RecordBatch;
 use arrow_buffer::ArrowNativeType;
 use datafusion_common::hash_utils::create_hashes;
 use datafusion_common::utils::bisect;
-use datafusion_common::{internal_err, plan_err, JoinSide, JoinType, Result};
+use datafusion_common::{internal_err, plan_err, HashSet, JoinSide, JoinType, 
Result};
 use datafusion_execution::memory_pool::MemoryConsumer;
 use datafusion_execution::TaskContext;
 use datafusion_expr::interval_arithmetic::Interval;
@@ -77,7 +77,6 @@ use datafusion_physical_expr_common::sort_expr::{
     LexOrdering, LexOrderingRef, LexRequirement,
 };
 use futures::{ready, Stream, StreamExt};
-use hashbrown::HashSet;
 use parking_lot::Mutex;
 
 const HASHMAP_SHRINK_SCALE_FACTOR: usize = 4;
diff --git a/datafusion/physical-plan/src/metrics/mod.rs 
b/datafusion/physical-plan/src/metrics/mod.rs
index ead0ca3369..4712729bda 100644
--- a/datafusion/physical-plan/src/metrics/mod.rs
+++ b/datafusion/physical-plan/src/metrics/mod.rs
@@ -28,7 +28,7 @@ use std::{
     sync::Arc,
 };
 
-use hashbrown::HashMap;
+use datafusion_common::HashMap;
 
 // public exports
 pub use baseline::{BaselineMetrics, RecordOutput};
diff --git a/datafusion/physical-plan/src/repartition/mod.rs 
b/datafusion/physical-plan/src/repartition/mod.rs
index bc65b25156..4d0dbc75d4 100644
--- a/datafusion/physical-plan/src/repartition/mod.rs
+++ b/datafusion/physical-plan/src/repartition/mod.rs
@@ -50,10 +50,10 @@ use datafusion_execution::TaskContext;
 use datafusion_physical_expr::{EquivalenceProperties, PhysicalExpr};
 
 use crate::execution_plan::CardinalityEffect;
+use datafusion_common::HashMap;
 use datafusion_physical_expr_common::sort_expr::{LexOrdering, 
PhysicalSortExpr};
 use futures::stream::Stream;
 use futures::{FutureExt, StreamExt, TryStreamExt};
-use hashbrown::HashMap;
 use log::trace;
 use parking_lot::Mutex;
 
diff --git a/datafusion/physical-plan/src/topk/mod.rs 
b/datafusion/physical-plan/src/topk/mod.rs
index 14469ab6c0..27bb3b2b36 100644
--- a/datafusion/physical-plan/src/topk/mod.rs
+++ b/datafusion/physical-plan/src/topk/mod.rs
@@ -27,6 +27,7 @@ use std::{cmp::Ordering, collections::BinaryHeap, sync::Arc};
 use crate::{stream::RecordBatchStreamAdapter, SendableRecordBatchStream};
 use arrow_array::{Array, ArrayRef, RecordBatch};
 use arrow_schema::SchemaRef;
+use datafusion_common::HashMap;
 use datafusion_common::Result;
 use datafusion_execution::{
     memory_pool::{MemoryConsumer, MemoryReservation},
@@ -34,7 +35,6 @@ use datafusion_execution::{
 };
 use datafusion_physical_expr::PhysicalSortExpr;
 use datafusion_physical_expr_common::sort_expr::LexOrdering;
-use hashbrown::HashMap;
 
 use super::metrics::{BaselineMetrics, Count, ExecutionPlanMetricsSet, 
MetricBuilder};
 
diff --git a/datafusion/physical-plan/src/unnest.rs 
b/datafusion/physical-plan/src/unnest.rs
index b7b9f17eb1..06288a1f70 100644
--- a/datafusion/physical-plan/src/unnest.rs
+++ b/datafusion/physical-plan/src/unnest.rs
@@ -18,7 +18,6 @@
 //! Define a plan for unnesting values in columns that contain a list type.
 
 use std::cmp::{self, Ordering};
-use std::collections::HashMap;
 use std::{any::Any, sync::Arc};
 
 use super::metrics::{self, ExecutionPlanMetricsSet, MetricBuilder, MetricsSet};
@@ -40,14 +39,13 @@ use arrow::record_batch::RecordBatch;
 use arrow_array::{Int64Array, Scalar, StructArray};
 use arrow_ord::cmp::lt;
 use datafusion_common::{
-    exec_datafusion_err, exec_err, internal_err, Result, UnnestOptions,
+    exec_datafusion_err, exec_err, internal_err, HashMap, HashSet, Result, 
UnnestOptions,
 };
 use datafusion_execution::TaskContext;
 use datafusion_physical_expr::EquivalenceProperties;
 
 use async_trait::async_trait;
 use futures::{Stream, StreamExt};
-use hashbrown::HashSet;
 use log::trace;
 
 /// Unnest the given columns (either with type struct or list)
diff --git a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs 
b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
index 8c0331f945..c3e0a4e389 100644
--- a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
+++ b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
@@ -22,7 +22,7 @@
 
 use std::any::Any;
 use std::cmp::{min, Ordering};
-use std::collections::{HashMap, VecDeque};
+use std::collections::VecDeque;
 use std::pin::Pin;
 use std::sync::Arc;
 use std::task::{Context, Poll};
@@ -51,7 +51,9 @@ use datafusion_common::stats::Precision;
 use datafusion_common::utils::{
     evaluate_partition_ranges, get_at_indices, get_row_at_idx,
 };
-use datafusion_common::{arrow_datafusion_err, exec_err, DataFusionError, 
Result};
+use datafusion_common::{
+    arrow_datafusion_err, exec_err, DataFusionError, HashMap, Result,
+};
 use datafusion_execution::TaskContext;
 use datafusion_expr::window_state::{PartitionBatchState, WindowAggState};
 use datafusion_expr::ColumnarValue;
diff --git a/datafusion/sql/src/unparser/rewrite.rs 
b/datafusion/sql/src/unparser/rewrite.rs
index 57d700f869..6b3b999ba0 100644
--- a/datafusion/sql/src/unparser/rewrite.rs
+++ b/datafusion/sql/src/unparser/rewrite.rs
@@ -15,15 +15,12 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use std::{
-    collections::{HashMap, HashSet},
-    sync::Arc,
-};
+use std::{collections::HashSet, sync::Arc};
 
 use arrow_schema::Schema;
 use datafusion_common::{
     tree_node::{Transformed, TransformedResult, TreeNode, TreeNodeRewriter},
-    Column, Result, TableReference,
+    Column, HashMap, Result, TableReference,
 };
 use datafusion_expr::{expr::Alias, tree_node::transform_sort_vec};
 use datafusion_expr::{Expr, LogicalPlan, Projection, Sort, SortExpr};
diff --git a/datafusion/sql/src/utils.rs b/datafusion/sql/src/utils.rs
index 14436de018..e479bdbacd 100644
--- a/datafusion/sql/src/utils.rs
+++ b/datafusion/sql/src/utils.rs
@@ -17,7 +17,6 @@
 
 //! SQL Utility Functions
 
-use std::collections::HashMap;
 use std::vec;
 
 use arrow_schema::{
@@ -27,8 +26,8 @@ use datafusion_common::tree_node::{
     Transformed, TransformedResult, TreeNode, TreeNodeRecursion, 
TreeNodeRewriter,
 };
 use datafusion_common::{
-    exec_err, internal_err, plan_err, Column, DFSchemaRef, DataFusionError, 
Result,
-    ScalarValue,
+    exec_err, internal_err, plan_err, Column, DFSchemaRef, DataFusionError, 
HashMap,
+    Result, ScalarValue,
 };
 use datafusion_expr::builder::get_struct_unnested_columns;
 use datafusion_expr::expr::{Alias, GroupingSet, Unnest, WindowFunction};
diff --git a/datafusion/substrait/src/extensions.rs 
b/datafusion/substrait/src/extensions.rs
index 459d0e0c5a..c74061f2c9 100644
--- a/datafusion/substrait/src/extensions.rs
+++ b/datafusion/substrait/src/extensions.rs
@@ -15,8 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use datafusion::common::{plan_err, DataFusionError};
-use std::collections::HashMap;
+use datafusion::common::{plan_err, DataFusionError, HashMap};
 use substrait::proto::extensions::simple_extension_declaration::{
     ExtensionFunction, ExtensionType, ExtensionTypeVariation, MappingType,
 };


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]


Reply via email to