This is an automated email from the ASF dual-hosted git repository.
comphead pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 84c9409789 Fix stack overflow calculating projected orderings (#12759)
84c9409789 is described below
commit 84c94097896e9d08e40d63e8c505414ea70e8b29
Author: Andrew Lamb <[email protected]>
AuthorDate: Sun Oct 6 14:33:35 2024 -0400
Fix stack overflow calculating projected orderings (#12759)
* Fix stack overflow calculating projected orderings
* fix docs
---
.../physical-expr/src/equivalence/properties.rs | 162 +++++++++++++++------
1 file changed, 121 insertions(+), 41 deletions(-)
diff --git a/datafusion/physical-expr/src/equivalence/properties.rs
b/datafusion/physical-expr/src/equivalence/properties.rs
index 51e85e25e0..1c12f46970 100644
--- a/datafusion/physical-expr/src/equivalence/properties.rs
+++ b/datafusion/physical-expr/src/equivalence/properties.rs
@@ -1295,21 +1295,30 @@ fn construct_prefix_orderings(
relevant_sort_expr: &PhysicalSortExpr,
dependency_map: &DependencyMap,
) -> Vec<LexOrdering> {
+ let mut dep_enumerator = DependencyEnumerator::new();
dependency_map[relevant_sort_expr]
.dependencies
.iter()
- .flat_map(|dep| construct_orderings(dep, dependency_map))
+ .flat_map(|dep| dep_enumerator.construct_orderings(dep,
dependency_map))
.collect()
}
-/// Given a set of relevant dependencies (`relevant_deps`) and a map of
dependencies
-/// (`dependency_map`), this function generates all possible prefix orderings
-/// based on the given dependencies.
+/// Generates all possible orderings where dependencies are satisfied for the
+/// current projection expression.
+///
+/// # Examaple
+/// If `dependences` is `a + b ASC` and the dependency map holds dependencies
+/// * `a ASC` --> `[c ASC]`
+/// * `b ASC` --> `[d DESC]`,
+///
+/// This function generates these two sort orders
+/// * `[c ASC, d DESC, a + b ASC]`
+/// * `[d DESC, c ASC, a + b ASC]`
///
/// # Parameters
///
-/// * `dependencies` - A reference to the dependencies.
-/// * `dependency_map` - A reference to the map of dependencies for
expressions.
+/// * `dependencies` - Set of relevant expressions.
+/// * `dependency_map` - Map of dependencies for expressions that may appear
in `dependencies`
///
/// # Returns
///
@@ -1335,11 +1344,6 @@ fn generate_dependency_orderings(
return vec![vec![]];
}
- // Generate all possible orderings where dependencies are satisfied for the
- // current projection expression. For example, if expression is `a + b
ASC`,
- // and the dependency for `a ASC` is `[c ASC]`, the dependency for `b ASC`
- // is `[d DESC]`, then we generate `[c ASC, d DESC, a + b ASC]` and
- // `[d DESC, c ASC, a + b ASC]`.
relevant_prefixes
.into_iter()
.multi_cartesian_product()
@@ -1421,7 +1425,7 @@ struct DependencyNode {
}
impl DependencyNode {
- // Insert dependency to the state (if exists).
+ /// Insert dependency to the state (if exists).
fn insert_dependency(&mut self, dependency: Option<&PhysicalSortExpr>) {
if let Some(dep) = dependency {
self.dependencies.insert(dep.clone());
@@ -1437,38 +1441,69 @@ impl DependencyNode {
type DependencyMap = IndexMap<PhysicalSortExpr, DependencyNode>;
type Dependencies = IndexSet<PhysicalSortExpr>;
-/// This function recursively analyzes the dependencies of the given sort
-/// expression within the given dependency map to construct lexicographical
-/// orderings that include the sort expression and its dependencies.
-///
-/// # Parameters
-///
-/// - `referred_sort_expr`: A reference to the sort expression
(`PhysicalSortExpr`)
-/// for which lexicographical orderings satisfying its dependencies are to be
-/// constructed.
-/// - `dependency_map`: A reference to the `DependencyMap` that contains
-/// dependencies for different `PhysicalSortExpr`s.
-///
-/// # Returns
-///
-/// A vector of lexicographical orderings (`Vec<LexOrdering>`) based on the
given
-/// sort expression and its dependencies.
-fn construct_orderings(
- referred_sort_expr: &PhysicalSortExpr,
- dependency_map: &DependencyMap,
-) -> Vec<LexOrdering> {
- // We are sure that `referred_sort_expr` is inside `dependency_map`.
- let node = &dependency_map[referred_sort_expr];
- // Since we work on intermediate nodes, we are sure `val.target_sort_expr`
- // exists.
- let target_sort_expr = node.target_sort_expr.clone().unwrap();
- if node.dependencies.is_empty() {
- vec![vec![target_sort_expr]]
- } else {
+/// Contains a mapping of all dependencies we have processed for each sort expr
+struct DependencyEnumerator<'a> {
+ /// Maps `expr` --> `[exprs]` that have previously been processed
+ seen: IndexMap<&'a PhysicalSortExpr, IndexSet<&'a PhysicalSortExpr>>,
+}
+
+impl<'a> DependencyEnumerator<'a> {
+ fn new() -> Self {
+ Self {
+ seen: IndexMap::new(),
+ }
+ }
+
+ /// Insert a new dependency,
+ ///
+ /// returns false if the dependency was already in the map
+ /// returns true if the dependency was newly inserted
+ fn insert(
+ &mut self,
+ target: &'a PhysicalSortExpr,
+ dep: &'a PhysicalSortExpr,
+ ) -> bool {
+ self.seen.entry(target).or_default().insert(dep)
+ }
+
+ /// This function recursively analyzes the dependencies of the given sort
+ /// expression within the given dependency map to construct lexicographical
+ /// orderings that include the sort expression and its dependencies.
+ ///
+ /// # Parameters
+ ///
+ /// - `referred_sort_expr`: A reference to the sort expression
(`PhysicalSortExpr`)
+ /// for which lexicographical orderings satisfying its dependencies are
to be
+ /// constructed.
+ /// - `dependency_map`: A reference to the `DependencyMap` that contains
+ /// dependencies for different `PhysicalSortExpr`s.
+ ///
+ /// # Returns
+ ///
+ /// A vector of lexicographical orderings (`Vec<LexOrdering>`) based on
the given
+ /// sort expression and its dependencies.
+ fn construct_orderings(
+ &mut self,
+ referred_sort_expr: &'a PhysicalSortExpr,
+ dependency_map: &'a DependencyMap,
+ ) -> Vec<LexOrdering> {
+ // We are sure that `referred_sort_expr` is inside `dependency_map`.
+ let node = &dependency_map[referred_sort_expr];
+ // Since we work on intermediate nodes, we are sure
`val.target_sort_expr`
+ // exists.
+ let target_sort_expr = node.target_sort_expr.as_ref().unwrap();
+ if node.dependencies.is_empty() {
+ return vec![vec![target_sort_expr.clone()]];
+ };
+
node.dependencies
.iter()
.flat_map(|dep| {
- let mut orderings = construct_orderings(dep, dependency_map);
+ let mut orderings = if self.insert(target_sort_expr, dep) {
+ self.construct_orderings(dep, dependency_map)
+ } else {
+ vec![]
+ };
for ordering in orderings.iter_mut() {
ordering.push(target_sort_expr.clone())
}
@@ -1763,6 +1798,51 @@ mod tests {
Ok(())
}
+ #[test]
+ fn project_equivalence_properties_test_multi() -> Result<()> {
+ // test multiple input orderings with equivalence properties
+ let input_schema = Arc::new(Schema::new(vec![
+ Field::new("a", DataType::Int64, true),
+ Field::new("b", DataType::Int64, true),
+ Field::new("c", DataType::Int64, true),
+ Field::new("d", DataType::Int64, true),
+ ]));
+
+ let mut input_properties =
EquivalenceProperties::new(Arc::clone(&input_schema));
+ // add equivalent ordering [a, b, c, d]
+ input_properties.add_new_ordering(vec![
+ parse_sort_expr("a", &input_schema),
+ parse_sort_expr("b", &input_schema),
+ parse_sort_expr("c", &input_schema),
+ parse_sort_expr("d", &input_schema),
+ ]);
+
+ // add equivalent ordering [a, c, b, d]
+ input_properties.add_new_ordering(vec![
+ parse_sort_expr("a", &input_schema),
+ parse_sort_expr("c", &input_schema),
+ parse_sort_expr("b", &input_schema), // NB b and c are swapped
+ parse_sort_expr("d", &input_schema),
+ ]);
+
+ // simply project all the columns in order
+ let proj_exprs = vec![
+ (col("a", &input_schema)?, "a".to_string()),
+ (col("b", &input_schema)?, "b".to_string()),
+ (col("c", &input_schema)?, "c".to_string()),
+ (col("d", &input_schema)?, "d".to_string()),
+ ];
+ let projection_mapping = ProjectionMapping::try_new(&proj_exprs,
&input_schema)?;
+ let out_properties = input_properties.project(&projection_mapping,
input_schema);
+
+ assert_eq!(
+ out_properties.to_string(),
+ "order: [[a@0 ASC,c@2 ASC,b@1 ASC,d@3 ASC], [a@0 ASC,b@1 ASC,c@2
ASC,d@3 ASC]]"
+ );
+
+ Ok(())
+ }
+
#[test]
fn test_join_equivalence_properties() -> Result<()> {
let schema = create_test_schema()?;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]