This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 85f3578f5f Minor: Move hash utils to common (#7684)
85f3578f5f is described below

commit 85f3578f5fb47d28a8bc3a7b9be0284b3ced0fcd
Author: Jay Zhan <[email protected]>
AuthorDate: Sat Sep 30 00:51:39 2023 +0800

    Minor: Move hash utils to common (#7684)
    
    * move hash utils to common
    
    Signed-off-by: jayzhan211 <[email protected]>
    
    * support backward compatibility
    
    Signed-off-by: jayzhan211 <[email protected]>
    
    ---------
    
    Signed-off-by: jayzhan211 <[email protected]>
    Co-authored-by: Andrew Lamb <[email protected]>
---
 datafusion-cli/Cargo.lock                                |  4 ++++
 datafusion/common/Cargo.toml                             |  4 ++++
 datafusion/{physical-expr => common}/src/hash_utils.rs   | 16 ++++++++--------
 datafusion/common/src/lib.rs                             |  1 +
 datafusion/physical-expr/src/expressions/in_list.rs      |  2 +-
 datafusion/physical-expr/src/lib.rs                      |  4 +++-
 .../physical-plan/src/aggregates/group_values/row.rs     |  2 +-
 datafusion/physical-plan/src/lib.rs                      |  3 ++-
 .../physical-plan/src/windows/bounded_window_agg_exec.rs |  3 ++-
 9 files changed, 26 insertions(+), 13 deletions(-)

diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index 0ca83452bd..775f8ec87e 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -1122,9 +1122,13 @@ dependencies = [
 name = "datafusion-common"
 version = "31.0.0"
 dependencies = [
+ "ahash",
  "arrow",
  "arrow-array",
+ "arrow-buffer",
+ "arrow-schema",
  "chrono",
+ "half",
  "num_cpus",
  "object_store",
  "parquet",
diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml
index f2b8f1a1e4..b5cdec1be1 100644
--- a/datafusion/common/Cargo.toml
+++ b/datafusion/common/Cargo.toml
@@ -39,10 +39,14 @@ default = ["parquet"]
 pyarrow = ["pyo3", "arrow/pyarrow"]
 
 [dependencies]
+ahash = { version = "0.8", default-features = false, features = 
["runtime-rng"] }
 apache-avro = { version = "0.16", default-features = false, features = 
["snappy"], optional = true }
 arrow = { workspace = true }
 arrow-array = { workspace = true }
+arrow-buffer = { workspace = true }
+arrow-schema = { workspace = true }
 chrono = { workspace = true }
+half = { version = "2.1", default-features = false }
 num_cpus = "1.13.0"
 object_store = { version = "0.7.0", default-features = false, optional = true }
 parquet = { workspace = true, optional = true }
diff --git a/datafusion/physical-expr/src/hash_utils.rs 
b/datafusion/common/src/hash_utils.rs
similarity index 98%
rename from datafusion/physical-expr/src/hash_utils.rs
rename to datafusion/common/src/hash_utils.rs
index 379e0eba52..9198461e00 100644
--- a/datafusion/physical-expr/src/hash_utils.rs
+++ b/datafusion/common/src/hash_utils.rs
@@ -17,19 +17,19 @@
 
 //! Functionality used both on logical and physical plans
 
+use std::sync::Arc;
+
 use ahash::RandomState;
 use arrow::array::*;
 use arrow::datatypes::*;
 use arrow::row::Rows;
 use arrow::{downcast_dictionary_array, downcast_primitive_array};
 use arrow_buffer::i256;
-use datafusion_common::{
-    cast::{
-        as_boolean_array, as_generic_binary_array, as_primitive_array, 
as_string_array,
-    },
-    internal_err, DataFusionError, Result,
+
+use crate::cast::{
+    as_boolean_array, as_generic_binary_array, as_primitive_array, 
as_string_array,
 };
-use std::sync::Arc;
+use crate::error::{DataFusionError, Result, _internal_err};
 
 // Combines two hashes into one hash
 #[inline]
@@ -51,7 +51,7 @@ fn hash_null(random_state: &RandomState, hashes_buffer: &'_ 
mut [u64], mul_col:
     }
 }
 
-pub(crate) trait HashValue {
+pub trait HashValue {
     fn hash_one(&self, state: &RandomState) -> u64;
 }
 
@@ -337,7 +337,7 @@ pub fn create_hashes<'a>(
             }
             _ => {
                 // This is internal because we should have caught this before.
-                return internal_err!(
+                return _internal_err!(
                     "Unsupported data type in hasher: {}",
                     col.data_type()
                 );
diff --git a/datafusion/common/src/lib.rs b/datafusion/common/src/lib.rs
index eeb5b26813..71782f6704 100644
--- a/datafusion/common/src/lib.rs
+++ b/datafusion/common/src/lib.rs
@@ -25,6 +25,7 @@ mod error;
 pub mod file_options;
 pub mod format;
 mod functional_dependencies;
+pub mod hash_utils;
 mod join_type;
 pub mod parsers;
 #[cfg(feature = "pyarrow")]
diff --git a/datafusion/physical-expr/src/expressions/in_list.rs 
b/datafusion/physical-expr/src/expressions/in_list.rs
index c92bbbb74f..bdc476f5b3 100644
--- a/datafusion/physical-expr/src/expressions/in_list.rs
+++ b/datafusion/physical-expr/src/expressions/in_list.rs
@@ -24,7 +24,6 @@ use std::fmt::Debug;
 use std::hash::{Hash, Hasher};
 use std::sync::Arc;
 
-use crate::hash_utils::HashValue;
 use crate::physical_expr::down_cast_any_ref;
 use crate::utils::expr_list_eq_any_order;
 use crate::PhysicalExpr;
@@ -37,6 +36,7 @@ use arrow::datatypes::*;
 use arrow::record_batch::RecordBatch;
 use arrow::util::bit_iterator::BitIndexIterator;
 use arrow::{downcast_dictionary_array, downcast_primitive_array};
+use datafusion_common::hash_utils::HashValue;
 use datafusion_common::{
     cast::{as_boolean_array, as_generic_binary_array, as_string_array},
     internal_err, not_impl_err, DataFusionError, Result, ScalarValue,
diff --git a/datafusion/physical-expr/src/lib.rs 
b/datafusion/physical-expr/src/lib.rs
index e83dee2e6c..48d5f4e130 100644
--- a/datafusion/physical-expr/src/lib.rs
+++ b/datafusion/physical-expr/src/lib.rs
@@ -28,7 +28,6 @@ pub mod equivalence;
 pub mod execution_props;
 pub mod expressions;
 pub mod functions;
-pub mod hash_utils;
 pub mod intervals;
 pub mod math_expressions;
 mod partitioning;
@@ -49,6 +48,9 @@ pub mod utils;
 pub mod var_provider;
 pub mod window;
 
+// For backwards compatibility
+pub use datafusion_common::hash_utils;
+
 pub use aggregate::groups_accumulator::{
     EmitTo, GroupsAccumulator, GroupsAccumulatorAdapter,
 };
diff --git a/datafusion/physical-plan/src/aggregates/group_values/row.rs 
b/datafusion/physical-plan/src/aggregates/group_values/row.rs
index 746537557d..10ff9edb89 100644
--- a/datafusion/physical-plan/src/aggregates/group_values/row.rs
+++ b/datafusion/physical-plan/src/aggregates/group_values/row.rs
@@ -22,9 +22,9 @@ use arrow::record_batch::RecordBatch;
 use arrow::row::{RowConverter, Rows, SortField};
 use arrow_array::{Array, ArrayRef};
 use arrow_schema::{DataType, SchemaRef};
+use datafusion_common::hash_utils::create_hashes;
 use datafusion_common::{DataFusionError, Result};
 use datafusion_execution::memory_pool::proxy::{RawTableAllocExt, VecAllocExt};
-use datafusion_physical_expr::hash_utils::create_hashes;
 use datafusion_physical_expr::EmitTo;
 use hashbrown::raw::RawTable;
 
diff --git a/datafusion/physical-plan/src/lib.rs 
b/datafusion/physical-plan/src/lib.rs
index 76adf7611d..aca10893db 100644
--- a/datafusion/physical-plan/src/lib.rs
+++ b/datafusion/physical-plan/src/lib.rs
@@ -375,10 +375,11 @@ pub mod windows;
 
 use crate::repartition::RepartitionExec;
 use crate::sorts::sort_preserving_merge::SortPreservingMergeExec;
+pub use datafusion_common::hash_utils;
 pub use datafusion_common::utils::project_schema;
 use datafusion_execution::TaskContext;
 pub use datafusion_physical_expr::{
-    expressions, functions, hash_utils, 
ordering_equivalence_properties_helper, udf,
+    expressions, functions, ordering_equivalence_properties_helper, udf,
 };
 
 #[cfg(test)]
diff --git a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs 
b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
index 4108b42205..dfef0ddefa 100644
--- a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
+++ b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
@@ -43,6 +43,8 @@ use arrow::{
     datatypes::{Schema, SchemaBuilder, SchemaRef},
     record_batch::RecordBatch,
 };
+
+use datafusion_common::hash_utils::create_hashes;
 use datafusion_common::utils::{
     evaluate_partition_ranges, get_arrayref_at_indices, get_at_indices,
     get_record_batch_at_indices, get_row_at_idx,
@@ -51,7 +53,6 @@ use datafusion_common::{exec_err, plan_err, DataFusionError, 
Result};
 use datafusion_execution::TaskContext;
 use datafusion_expr::window_state::{PartitionBatchState, WindowAggState};
 use datafusion_expr::ColumnarValue;
-use datafusion_physical_expr::hash_utils::create_hashes;
 use datafusion_physical_expr::window::{
     PartitionBatches, PartitionKey, PartitionWindowAggStates, WindowState,
 };

Reply via email to