This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new e99e02b9b9 Fix `recursive-protection` feature flag (#13887)
e99e02b9b9 is described below
commit e99e02b9b9093ceb0c13a2dd32a2a89beba47930
Author: Andrew Lamb <[email protected]>
AuthorDate: Tue Dec 24 16:06:04 2024 -0500
Fix `recursive-protection` feature flag (#13887)
* Fix recursive-protection feature flag
* rename feature flag to be consistent
* Make default
* taplo format
---
README.md | 2 +-
datafusion-cli/Cargo.lock | 1 +
datafusion-cli/Cargo.toml | 1 +
datafusion/common/Cargo.toml | 3 +-
datafusion/common/src/tree_node.rs | 14 ++---
datafusion/core/Cargo.toml | 8 +++
datafusion/expr/Cargo.toml | 3 +-
datafusion/expr/src/expr_schema.rs | 2 +-
datafusion/expr/src/logical_plan/tree_node.rs | 12 ++---
datafusion/optimizer/Cargo.toml | 3 +-
datafusion/optimizer/src/analyzer/subquery.rs | 2 +-
.../optimizer/src/common_subexpr_eliminate.rs | 2 +-
datafusion/optimizer/src/eliminate_cross_join.rs | 2 +-
.../optimizer/src/optimize_projections/mod.rs | 2 +-
datafusion/physical-optimizer/Cargo.toml | 3 +-
.../physical-optimizer/src/aggregate_statistics.rs | 2 +-
datafusion/sql/Cargo.toml | 4 +-
datafusion/sql/src/expr/mod.rs | 2 +-
datafusion/sql/src/lib.rs | 1 +
datafusion/sql/src/query.rs | 10 ++--
datafusion/sql/src/set_expr.rs | 2 +-
datafusion/sql/src/stack.rs | 63 ++++++++++++++++++++++
22 files changed, 108 insertions(+), 36 deletions(-)
diff --git a/README.md b/README.md
index c2ede4833e..e0fc6854ec 100644
--- a/README.md
+++ b/README.md
@@ -113,7 +113,7 @@ Default features:
- `regex_expressions`: regular expression functions, such as `regexp_match`
- `unicode_expressions`: Include unicode aware functions such as
`character_length`
- `unparser`: enables support to reverse LogicalPlans back into SQL
-- `recursive-protection`: uses
[recursive](https://docs.rs/recursive/latest/recursive/) for stack overflow
protection.
+- `recursive_protection`: uses
[recursive](https://docs.rs/recursive/latest/recursive/) for stack overflow
protection.
Optional features:
diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index 34505bee2e..1a42673cd3 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -1543,6 +1543,7 @@ dependencies = [
"indexmap",
"itertools",
"log",
+ "recursive",
"regex",
"regex-syntax",
]
diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml
index 4cdc2120a0..e0192037de 100644
--- a/datafusion-cli/Cargo.toml
+++ b/datafusion-cli/Cargo.toml
@@ -45,6 +45,7 @@ datafusion = { path = "../datafusion/core", version =
"43.0.0", features = [
"datetime_expressions",
"encoding_expressions",
"parquet",
+ "recursive_protection",
"regex_expressions",
"unicode_expressions",
"compression",
diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml
index 918f0cd583..b331a55a98 100644
--- a/datafusion/common/Cargo.toml
+++ b/datafusion/common/Cargo.toml
@@ -36,12 +36,11 @@ name = "datafusion_common"
path = "src/lib.rs"
[features]
-default = ["recursive-protection"]
avro = ["apache-avro"]
backtrace = []
pyarrow = ["pyo3", "arrow/pyarrow", "parquet"]
force_hash_collisions = []
-recursive-protection = ["dep:recursive"]
+recursive_protection = ["dep:recursive"]
[dependencies]
ahash = { workspace = true }
diff --git a/datafusion/common/src/tree_node.rs
b/datafusion/common/src/tree_node.rs
index 9c59652e0d..c70389b631 100644
--- a/datafusion/common/src/tree_node.rs
+++ b/datafusion/common/src/tree_node.rs
@@ -124,7 +124,7 @@ pub trait TreeNode: Sized {
/// TreeNodeVisitor::f_up(ChildNode2)
/// TreeNodeVisitor::f_up(ParentNode)
/// ```
- #[cfg_attr(feature = "recursive-protection", recursive::recursive)]
+ #[cfg_attr(feature = "recursive_protection", recursive::recursive)]
fn visit<'n, V: TreeNodeVisitor<'n, Node = Self>>(
&'n self,
visitor: &mut V,
@@ -174,7 +174,7 @@ pub trait TreeNode: Sized {
/// TreeNodeRewriter::f_up(ChildNode2)
/// TreeNodeRewriter::f_up(ParentNode)
/// ```
- #[cfg_attr(feature = "recursive-protection", recursive::recursive)]
+ #[cfg_attr(feature = "recursive_protection", recursive::recursive)]
fn rewrite<R: TreeNodeRewriter<Node = Self>>(
self,
rewriter: &mut R,
@@ -197,7 +197,7 @@ pub trait TreeNode: Sized {
&'n self,
mut f: F,
) -> Result<TreeNodeRecursion> {
- #[cfg_attr(feature = "recursive-protection", recursive::recursive)]
+ #[cfg_attr(feature = "recursive_protection", recursive::recursive)]
fn apply_impl<'n, N: TreeNode, F: FnMut(&'n N) ->
Result<TreeNodeRecursion>>(
node: &'n N,
f: &mut F,
@@ -232,7 +232,7 @@ pub trait TreeNode: Sized {
self,
mut f: F,
) -> Result<Transformed<Self>> {
- #[cfg_attr(feature = "recursive-protection", recursive::recursive)]
+ #[cfg_attr(feature = "recursive_protection", recursive::recursive)]
fn transform_down_impl<N: TreeNode, F: FnMut(N) ->
Result<Transformed<N>>>(
node: N,
f: &mut F,
@@ -256,7 +256,7 @@ pub trait TreeNode: Sized {
self,
mut f: F,
) -> Result<Transformed<Self>> {
- #[cfg_attr(feature = "recursive-protection", recursive::recursive)]
+ #[cfg_attr(feature = "recursive_protection", recursive::recursive)]
fn transform_up_impl<N: TreeNode, F: FnMut(N) ->
Result<Transformed<N>>>(
node: N,
f: &mut F,
@@ -371,7 +371,7 @@ pub trait TreeNode: Sized {
mut f_down: FD,
mut f_up: FU,
) -> Result<Transformed<Self>> {
- #[cfg_attr(feature = "recursive-protection", recursive::recursive)]
+ #[cfg_attr(feature = "recursive_protection", recursive::recursive)]
fn transform_down_up_impl<
N: TreeNode,
FD: FnMut(N) -> Result<Transformed<N>>,
@@ -2349,7 +2349,7 @@ pub(crate) mod tests {
Ok(())
}
- #[cfg(feature = "recursive-protection")]
+ #[cfg(feature = "recursive_protection")]
#[test]
fn test_large_tree() {
let mut item = TestTreeNode::new_leaf("initial".to_string());
diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml
index dca40ab3d6..64ad8f2ba1 100644
--- a/datafusion/core/Cargo.toml
+++ b/datafusion/core/Cargo.toml
@@ -59,6 +59,7 @@ default = [
"unicode_expressions",
"compression",
"parquet",
+ "recursive_protection",
]
encoding_expressions = ["datafusion-functions/encoding_expressions"]
# Used for testing ONLY: causes all values to hash to the same value (test for
collisions)
@@ -69,6 +70,13 @@ pyarrow = ["datafusion-common/pyarrow", "parquet"]
regex_expressions = [
"datafusion-functions/regex_expressions",
]
+recursive_protection = [
+ "datafusion-common/recursive_protection",
+ "datafusion-expr/recursive_protection",
+ "datafusion-optimizer/recursive_protection",
+ "datafusion-physical-optimizer/recursive_protection",
+ "datafusion-sql/recursive_protection",
+]
serde = ["arrow-schema/serde"]
string_expressions = ["datafusion-functions/string_expressions"]
unicode_expressions = [
diff --git a/datafusion/expr/Cargo.toml b/datafusion/expr/Cargo.toml
index 403a80972c..b4f3f7fb68 100644
--- a/datafusion/expr/Cargo.toml
+++ b/datafusion/expr/Cargo.toml
@@ -36,8 +36,7 @@ name = "datafusion_expr"
path = "src/lib.rs"
[features]
-default = ["recursive-protection"]
-recursive-protection = ["dep:recursive"]
+recursive_protection = ["dep:recursive"]
[dependencies]
arrow = { workspace = true }
diff --git a/datafusion/expr/src/expr_schema.rs
b/datafusion/expr/src/expr_schema.rs
index e61904e249..d5c2ac396e 100644
--- a/datafusion/expr/src/expr_schema.rs
+++ b/datafusion/expr/src/expr_schema.rs
@@ -99,7 +99,7 @@ impl ExprSchemable for Expr {
/// expression refers to a column that does not exist in the
/// schema, or when the expression is incorrectly typed
/// (e.g. `[utf8] + [bool]`).
- #[cfg_attr(feature = "recursive-protection", recursive::recursive)]
+ #[cfg_attr(feature = "recursive_protection", recursive::recursive)]
fn get_type(&self, schema: &dyn ExprSchema) -> Result<DataType> {
match self {
Expr::Alias(Alias { expr, name, .. }) => match &**expr {
diff --git a/datafusion/expr/src/logical_plan/tree_node.rs
b/datafusion/expr/src/logical_plan/tree_node.rs
index cdc95b84d8..9a6103afd4 100644
--- a/datafusion/expr/src/logical_plan/tree_node.rs
+++ b/datafusion/expr/src/logical_plan/tree_node.rs
@@ -668,7 +668,7 @@ impl LogicalPlan {
/// Visits a plan similarly to [`Self::visit`], including subqueries that
/// may appear in expressions such as `IN (SELECT ...)`.
- #[cfg_attr(feature = "recursive-protection", recursive::recursive)]
+ #[cfg_attr(feature = "recursive_protection", recursive::recursive)]
pub fn visit_with_subqueries<V: for<'n> TreeNodeVisitor<'n, Node = Self>>(
&self,
visitor: &mut V,
@@ -687,7 +687,7 @@ impl LogicalPlan {
/// Similarly to [`Self::rewrite`], rewrites this node and its inputs
using `f`,
/// including subqueries that may appear in expressions such as `IN (SELECT
/// ...)`.
- #[cfg_attr(feature = "recursive-protection", recursive::recursive)]
+ #[cfg_attr(feature = "recursive_protection", recursive::recursive)]
pub fn rewrite_with_subqueries<R: TreeNodeRewriter<Node = Self>>(
self,
rewriter: &mut R,
@@ -706,7 +706,7 @@ impl LogicalPlan {
&self,
mut f: F,
) -> Result<TreeNodeRecursion> {
- #[cfg_attr(feature = "recursive-protection", recursive::recursive)]
+ #[cfg_attr(feature = "recursive_protection", recursive::recursive)]
fn apply_with_subqueries_impl<
F: FnMut(&LogicalPlan) -> Result<TreeNodeRecursion>,
>(
@@ -741,7 +741,7 @@ impl LogicalPlan {
self,
mut f: F,
) -> Result<Transformed<Self>> {
- #[cfg_attr(feature = "recursive-protection", recursive::recursive)]
+ #[cfg_attr(feature = "recursive_protection", recursive::recursive)]
fn transform_down_with_subqueries_impl<
F: FnMut(LogicalPlan) -> Result<Transformed<LogicalPlan>>,
>(
@@ -766,7 +766,7 @@ impl LogicalPlan {
self,
mut f: F,
) -> Result<Transformed<Self>> {
- #[cfg_attr(feature = "recursive-protection", recursive::recursive)]
+ #[cfg_attr(feature = "recursive_protection", recursive::recursive)]
fn transform_up_with_subqueries_impl<
F: FnMut(LogicalPlan) -> Result<Transformed<LogicalPlan>>,
>(
@@ -794,7 +794,7 @@ impl LogicalPlan {
mut f_down: FD,
mut f_up: FU,
) -> Result<Transformed<Self>> {
- #[cfg_attr(feature = "recursive-protection", recursive::recursive)]
+ #[cfg_attr(feature = "recursive_protection", recursive::recursive)]
fn transform_down_up_with_subqueries_impl<
FD: FnMut(LogicalPlan) -> Result<Transformed<LogicalPlan>>,
FU: FnMut(LogicalPlan) -> Result<Transformed<LogicalPlan>>,
diff --git a/datafusion/optimizer/Cargo.toml b/datafusion/optimizer/Cargo.toml
index 3032c67682..ba0dedc576 100644
--- a/datafusion/optimizer/Cargo.toml
+++ b/datafusion/optimizer/Cargo.toml
@@ -36,8 +36,7 @@ name = "datafusion_optimizer"
path = "src/lib.rs"
[features]
-default = ["recursive-protection"]
-recursive-protection = ["dep:recursive"]
+recursive_protection = ["dep:recursive"]
[dependencies]
arrow = { workspace = true }
diff --git a/datafusion/optimizer/src/analyzer/subquery.rs
b/datafusion/optimizer/src/analyzer/subquery.rs
index 0d04efbcf3..7129da85f3 100644
--- a/datafusion/optimizer/src/analyzer/subquery.rs
+++ b/datafusion/optimizer/src/analyzer/subquery.rs
@@ -128,7 +128,7 @@ fn check_correlations_in_subquery(inner_plan: &LogicalPlan)
-> Result<()> {
}
// Recursively check the unsupported outer references in the sub query plan.
-#[cfg_attr(feature = "recursive-protection", recursive::recursive)]
+#[cfg_attr(feature = "recursive_protection", recursive::recursive)]
fn check_inner_plan(inner_plan: &LogicalPlan, can_contain_outer_ref: bool) ->
Result<()> {
if !can_contain_outer_ref && inner_plan.contains_outer_reference() {
return plan_err!("Accessing outer reference columns is not allowed in
the plan");
diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs
b/datafusion/optimizer/src/common_subexpr_eliminate.rs
index 92e6dd1ad4..4b9a83fd3e 100644
--- a/datafusion/optimizer/src/common_subexpr_eliminate.rs
+++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs
@@ -531,7 +531,7 @@ impl OptimizerRule for CommonSubexprEliminate {
None
}
- #[cfg_attr(feature = "recursive-protection", recursive::recursive)]
+ #[cfg_attr(feature = "recursive_protection", recursive::recursive)]
fn rewrite(
&self,
plan: LogicalPlan,
diff --git a/datafusion/optimizer/src/eliminate_cross_join.rs
b/datafusion/optimizer/src/eliminate_cross_join.rs
index 64d24016f4..d35572e6d3 100644
--- a/datafusion/optimizer/src/eliminate_cross_join.rs
+++ b/datafusion/optimizer/src/eliminate_cross_join.rs
@@ -79,7 +79,7 @@ impl OptimizerRule for EliminateCrossJoin {
true
}
- #[cfg_attr(feature = "recursive-protection", recursive::recursive)]
+ #[cfg_attr(feature = "recursive_protection", recursive::recursive)]
fn rewrite(
&self,
plan: LogicalPlan,
diff --git a/datafusion/optimizer/src/optimize_projections/mod.rs
b/datafusion/optimizer/src/optimize_projections/mod.rs
index f6e3eec674..b7dd391586 100644
--- a/datafusion/optimizer/src/optimize_projections/mod.rs
+++ b/datafusion/optimizer/src/optimize_projections/mod.rs
@@ -109,7 +109,7 @@ impl OptimizerRule for OptimizeProjections {
/// columns.
/// - `Ok(None)`: Signal that the given logical plan did not require any
change.
/// - `Err(error)`: An error occurred during the optimization process.
-#[cfg_attr(feature = "recursive-protection", recursive::recursive)]
+#[cfg_attr(feature = "recursive_protection", recursive::recursive)]
fn optimize_projections(
plan: LogicalPlan,
config: &dyn OptimizerConfig,
diff --git a/datafusion/physical-optimizer/Cargo.toml
b/datafusion/physical-optimizer/Cargo.toml
index c964ca47e6..3454209445 100644
--- a/datafusion/physical-optimizer/Cargo.toml
+++ b/datafusion/physical-optimizer/Cargo.toml
@@ -32,8 +32,7 @@ rust-version = { workspace = true }
workspace = true
[features]
-default = ["recursive-protection"]
-recursive-protection = ["dep:recursive"]
+recursive_protection = ["dep:recursive"]
[dependencies]
arrow = { workspace = true }
diff --git a/datafusion/physical-optimizer/src/aggregate_statistics.rs
b/datafusion/physical-optimizer/src/aggregate_statistics.rs
index 0849a3d97a..a00bc4b1d5 100644
--- a/datafusion/physical-optimizer/src/aggregate_statistics.rs
+++ b/datafusion/physical-optimizer/src/aggregate_statistics.rs
@@ -41,7 +41,7 @@ impl AggregateStatistics {
}
impl PhysicalOptimizerRule for AggregateStatistics {
- #[cfg_attr(feature = "recursive-protection", recursive::recursive)]
+ #[cfg_attr(feature = "recursive_protection", recursive::recursive)]
fn optimize(
&self,
plan: Arc<dyn ExecutionPlan>,
diff --git a/datafusion/sql/Cargo.toml b/datafusion/sql/Cargo.toml
index c6500e9742..224c7cb191 100644
--- a/datafusion/sql/Cargo.toml
+++ b/datafusion/sql/Cargo.toml
@@ -36,10 +36,10 @@ name = "datafusion_sql"
path = "src/lib.rs"
[features]
-default = ["unicode_expressions", "unparser", "recursive-protection"]
+default = ["unicode_expressions", "unparser"]
unicode_expressions = []
unparser = []
-recursive-protection = ["dep:recursive"]
+recursive_protection = ["dep:recursive"]
[dependencies]
arrow = { workspace = true }
diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs
index 7c4d8dd21d..9b40ebdaf6 100644
--- a/datafusion/sql/src/expr/mod.rs
+++ b/datafusion/sql/src/expr/mod.rs
@@ -196,7 +196,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
/// Internal implementation. Use
/// [`Self::sql_expr_to_logical_expr`] to plan exprs.
- #[cfg_attr(feature = "recursive-protection", recursive::recursive)]
+ #[cfg_attr(feature = "recursive_protection", recursive::recursive)]
fn sql_expr_to_logical_expr_internal(
&self,
sql: SQLExpr,
diff --git a/datafusion/sql/src/lib.rs b/datafusion/sql/src/lib.rs
index a5d5389894..f20560fe7c 100644
--- a/datafusion/sql/src/lib.rs
+++ b/datafusion/sql/src/lib.rs
@@ -43,6 +43,7 @@ mod query;
mod relation;
mod select;
mod set_expr;
+mod stack;
mod statement;
#[cfg(feature = "unparser")]
pub mod unparser;
diff --git a/datafusion/sql/src/query.rs b/datafusion/sql/src/query.rs
index 2e115d140e..9d5a54d90b 100644
--- a/datafusion/sql/src/query.rs
+++ b/datafusion/sql/src/query.rs
@@ -19,6 +19,7 @@ use std::sync::Arc;
use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
+use crate::stack::StackGuard;
use datafusion_common::{not_impl_err, Constraints, DFSchema, Result};
use datafusion_expr::expr::Sort;
use datafusion_expr::{
@@ -62,10 +63,11 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
// The functions called from `set_expr_to_plan()` need more
than 128KB
// stack in debug builds as investigated in:
//
https://github.com/apache/datafusion/pull/13310#discussion_r1836813902
- let min_stack_size = recursive::get_minimum_stack_size();
- recursive::set_minimum_stack_size(256 * 1024);
- let plan = self.set_expr_to_plan(other, planner_context)?;
- recursive::set_minimum_stack_size(min_stack_size);
+ let plan = {
+ // scope for dropping _guard
+ let _guard = StackGuard::new(256 * 1024);
+ self.set_expr_to_plan(other, planner_context)
+ }?;
let oby_exprs = to_order_by_exprs(query.order_by)?;
let order_by_rex = self.order_by_to_sort_expr(
oby_exprs,
diff --git a/datafusion/sql/src/set_expr.rs b/datafusion/sql/src/set_expr.rs
index d1569c81d3..75fdbd20e8 100644
--- a/datafusion/sql/src/set_expr.rs
+++ b/datafusion/sql/src/set_expr.rs
@@ -21,7 +21,7 @@ use datafusion_expr::{LogicalPlan, LogicalPlanBuilder};
use sqlparser::ast::{SetExpr, SetOperator, SetQuantifier};
impl<S: ContextProvider> SqlToRel<'_, S> {
- #[cfg_attr(feature = "recursive-protection", recursive::recursive)]
+ #[cfg_attr(feature = "recursive_protection", recursive::recursive)]
pub(super) fn set_expr_to_plan(
&self,
set_expr: SetExpr,
diff --git a/datafusion/sql/src/stack.rs b/datafusion/sql/src/stack.rs
new file mode 100644
index 0000000000..b7d5eebdd7
--- /dev/null
+++ b/datafusion/sql/src/stack.rs
@@ -0,0 +1,63 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+pub use inner::StackGuard;
+
+/// A guard that sets the minimum stack size for the current thread to
`min_stack_size` bytes.
+#[cfg(feature = "recursive_protection")]
+mod inner {
+ /// Sets the stack size to `min_stack_size` bytes on call to `new()` and
+ /// resets to the previous value when this structure is dropped.
+ pub struct StackGuard {
+ previous_stack_size: usize,
+ }
+
+ impl StackGuard {
+ /// Sets the stack size to `min_stack_size` bytes on call to `new()`
and
+ /// resets to the previous value when this structure is dropped.
+ pub fn new(min_stack_size: usize) -> Self {
+ let previous_stack_size = recursive::get_minimum_stack_size();
+ recursive::set_minimum_stack_size(min_stack_size);
+ Self {
+ previous_stack_size,
+ }
+ }
+ }
+
+ impl Drop for StackGuard {
+ fn drop(&mut self) {
+ recursive::set_minimum_stack_size(self.previous_stack_size);
+ }
+ }
+}
+
+/// A stub implementation of the stack guard when the recursive protection
+/// feature is not enabled
+#[cfg(not(feature = "recursive_protection"))]
+mod inner {
+ /// A stub implementation of the stack guard when the recursive protection
+ /// feature is not enabled that does nothing
+ pub struct StackGuard;
+
+ impl StackGuard {
+ /// A stub implementation of the stack guard when the recursive
protection
+ /// feature is not enabled
+ pub fn new(_min_stack_size: usize) -> Self {
+ Self
+ }
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]