This is an automated email from the ASF dual-hosted git repository.
github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 5d37babf36 chore: Remove usage of `paste` crate (#20946)
5d37babf36 is described below
commit 5d37babf3685cba060f46dd1b9c78507d2c0c048
Author: Bhargava Vadlamani <[email protected]>
AuthorDate: Mon Mar 16 13:40:25 2026 -0700
chore: Remove usage of `paste` crate (#20946)
## Which issue does this PR close?
<!--
We generally require a GitHub issue to be filed for all bug fixes and
enhancements and this helps us generate change logs for our releases.
You can link an issue to this PR using the GitHub syntax. For example
`Closes #123` indicates that this PR will close issue #123.
-->
- Closes #20853
## Rationale for this change
<!--
Why are you proposing this change? If this is already explained clearly
in the issue then this section is not needed.
Explaining clearly why changes are proposed helps reviewers understand
your changes and offer better suggestions for fixes.
-->
## What changes are included in this PR?
<!--
There is no need to duplicate the description in the issue here but it
is sometimes worth providing a summary of the individual changes in this
PR.
-->
## Are these changes tested?
<!--
We typically require tests for all PRs in order to:
1. Prevent the code from being accidentally broken by subsequent changes
2. Serve as another way to document the expected behavior of the code
If tests are not included in your PR, please explain why (for example,
are they covered by existing tests)?
-->
## Are there any user-facing changes?
<!--
If there are user-facing changes then we may require documentation to be
updated before approving the PR.
-->
<!--
If there are any breaking changes to public APIs, please add the `api
change` label.
-->
---
.github/workflows/audit.yml | 2 +-
Cargo.lock | 10 -
Cargo.toml | 1 -
datafusion/common/Cargo.toml | 1 -
datafusion/common/src/error.rs | 141 +++--
datafusion/core/Cargo.toml | 1 -
datafusion/core/tests/parquet/page_pruning.rs | 620 +++++++++++---------
datafusion/core/tests/parquet/row_group_pruning.rs | 642 +++++++++++----------
datafusion/expr-common/Cargo.toml | 1 -
datafusion/expr-common/src/interval_arithmetic.rs | 38 +-
datafusion/expr/Cargo.toml | 1 -
datafusion/expr/src/test/function_stub.rs | 2 -
datafusion/functions-aggregate/Cargo.toml | 1 -
datafusion/functions-aggregate/src/macros.rs | 2 -
datafusion/functions-nested/Cargo.toml | 1 -
datafusion/functions-nested/src/macros.rs | 6 -
datafusion/functions-table/Cargo.toml | 1 -
datafusion/functions-table/src/lib.rs | 34 +-
datafusion/functions-window/Cargo.toml | 1 -
datafusion/functions-window/src/cume_dist.rs | 1 +
datafusion/functions-window/src/lead_lag.rs | 2 +
datafusion/functions-window/src/macros.rs | 60 +-
datafusion/functions-window/src/nth_value.rs | 3 +
datafusion/functions-window/src/ntile.rs | 1 +
datafusion/functions-window/src/rank.rs | 3 +
datafusion/functions-window/src/row_number.rs | 1 +
datafusion/physical-expr/Cargo.toml | 1 -
.../physical-expr/src/expressions/negative.rs | 19 +-
datafusion/sql/Cargo.toml | 1 -
datafusion/sql/src/expr/mod.rs | 68 +--
30 files changed, 912 insertions(+), 754 deletions(-)
diff --git a/.github/workflows/audit.yml b/.github/workflows/audit.yml
index 3652506c32..b1cabc2d00 100644
--- a/.github/workflows/audit.yml
+++ b/.github/workflows/audit.yml
@@ -48,4 +48,4 @@ jobs:
- name: Run audit check
# Note: you can ignore specific RUSTSEC issues using the `--ignore`
flag ,for example:
# run: cargo audit --ignore RUSTSEC-2026-0001
- run: cargo audit --ignore RUSTSEC-2024-0436 --ignore RUSTSEC-2024-0014
+ run: cargo audit --ignore RUSTSEC-2024-0014
diff --git a/Cargo.lock b/Cargo.lock
index 168d3bd0c1..35660359ce 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1768,7 +1768,6 @@ dependencies = [
"object_store",
"parking_lot",
"parquet",
- "paste",
"pretty_assertions",
"rand 0.9.2",
"rand_distr",
@@ -1910,7 +1909,6 @@ dependencies = [
"log",
"object_store",
"parquet",
- "paste",
"rand 0.9.2",
"recursive",
"sqlparser",
@@ -2166,7 +2164,6 @@ dependencies = [
"indexmap 2.13.0",
"insta",
"itertools 0.14.0",
- "paste",
"recursive",
"serde_json",
"sqlparser",
@@ -2181,7 +2178,6 @@ dependencies = [
"indexmap 2.13.0",
"insta",
"itertools 0.14.0",
- "paste",
]
[[package]]
@@ -2270,7 +2266,6 @@ dependencies = [
"half",
"log",
"num-traits",
- "paste",
"rand 0.9.2",
]
@@ -2308,7 +2303,6 @@ dependencies = [
"itertools 0.14.0",
"itoa",
"log",
- "paste",
"rand 0.9.2",
]
@@ -2323,7 +2317,6 @@ dependencies = [
"datafusion-expr",
"datafusion-physical-plan",
"parking_lot",
- "paste",
]
[[package]]
@@ -2340,7 +2333,6 @@ dependencies = [
"datafusion-physical-expr",
"datafusion-physical-expr-common",
"log",
- "paste",
]
[[package]]
@@ -2406,7 +2398,6 @@ dependencies = [
"insta",
"itertools 0.14.0",
"parking_lot",
- "paste",
"petgraph",
"rand 0.9.2",
"recursive",
@@ -2625,7 +2616,6 @@ dependencies = [
"insta",
"itertools 0.14.0",
"log",
- "paste",
"recursive",
"regex",
"rstest",
diff --git a/Cargo.toml b/Cargo.toml
index a185cd874a..73e8a61d33 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -173,7 +173,6 @@ parquet = { version = "58.0.0", default-features = false,
features = [
"async",
"object_store",
] }
-paste = "1.0.15"
pbjson = { version = "0.9.0" }
pbjson-types = "0.9"
# Should match arrow-flight's version of prost.
diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml
index 4441fe9035..36435580b2 100644
--- a/datafusion/common/Cargo.toml
+++ b/datafusion/common/Cargo.toml
@@ -85,7 +85,6 @@ libc = "0.2.180"
log = { workspace = true }
object_store = { workspace = true, optional = true }
parquet = { workspace = true, optional = true, default-features = true }
-paste = { workspace = true }
recursive = { workspace = true, optional = true }
sqlparser = { workspace = true, optional = true }
tokio = { workspace = true }
diff --git a/datafusion/common/src/error.rs b/datafusion/common/src/error.rs
index 4f681896df..b7a30f868a 100644
--- a/datafusion/common/src/error.rs
+++ b/datafusion/common/src/error.rs
@@ -903,76 +903,125 @@ macro_rules! assert_ne_or_internal_err {
/// plan_err!("Error {val:?}")
///
/// `NAME_ERR` - macro name for wrapping Err(DataFusionError::*)
+/// `PREFIXED_NAME_ERR` - underscore-prefixed alias for NAME_ERR (e.g.,
_plan_err)
+/// (Needed to avoid compiler error when using macro in the same crate:
`macros from the current crate cannot be referred to by absolute paths`)
/// `NAME_DF_ERR` - macro name for wrapping DataFusionError::*. Needed to
keep backtrace opportunity
/// in construction where DataFusionError::* used directly, like `map_err`,
`ok_or_else`, etc
+/// `PREFIXED_NAME_DF_ERR` - underscore-prefixed alias for NAME_DF_ERR (e.g.,
_plan_datafusion_err).
+/// (Needed to avoid compiler error when using macro in the same crate:
`macros from the current crate cannot be referred to by absolute paths`)
macro_rules! make_error {
- ($NAME_ERR:ident, $NAME_DF_ERR: ident, $ERR:ident) => { make_error!(@inner
($), $NAME_ERR, $NAME_DF_ERR, $ERR); };
- (@inner ($d:tt), $NAME_ERR:ident, $NAME_DF_ERR:ident, $ERR:ident) => {
- ::paste::paste!{
- /// Macro wraps `$ERR` to add backtrace feature
- #[macro_export]
- macro_rules! $NAME_DF_ERR {
- ($d($d args:expr),* $d(; diagnostic=$d DIAG:expr)?) => {{
- let err =$crate::DataFusionError::$ERR(
- ::std::format!(
- "{}{}",
- ::std::format!($d($d args),*),
- $crate::DataFusionError::get_back_trace(),
- ).into()
- );
- $d (
- let err = err.with_diagnostic($d DIAG);
- )?
- err
- }
- }
+ ($NAME_ERR:ident, $PREFIXED_NAME_ERR:ident, $NAME_DF_ERR:ident,
$PREFIXED_NAME_DF_ERR:ident, $ERR:ident) => {
+ make_error!(@inner ($), $NAME_ERR, $PREFIXED_NAME_ERR, $NAME_DF_ERR,
$PREFIXED_NAME_DF_ERR, $ERR);
+ };
+ (@inner ($d:tt), $NAME_ERR:ident, $PREFIXED_NAME_ERR:ident,
$NAME_DF_ERR:ident, $PREFIXED_NAME_DF_ERR:ident, $ERR:ident) => {
+ /// Macro wraps `$ERR` to add backtrace feature
+ #[macro_export]
+ macro_rules! $NAME_DF_ERR {
+ ($d($d args:expr),* $d(; diagnostic = $d DIAG:expr)?) => {{
+ let err = $crate::DataFusionError::$ERR(
+ ::std::format!(
+ "{}{}",
+ ::std::format!($d($d args),*),
+ $crate::DataFusionError::get_back_trace(),
+ ).into()
+ );
+ $d (
+ let err = err.with_diagnostic($d DIAG);
+ )?
+ err
+ }}
}
- /// Macro wraps Err(`$ERR`) to add backtrace feature
- #[macro_export]
- macro_rules! $NAME_ERR {
- ($d($d args:expr),* $d(; diagnostic = $d DIAG:expr)?) => {{
- let err = $crate::[<_ $NAME_DF_ERR>]!($d($d args),*);
- $d (
- let err = err.with_diagnostic($d DIAG);
- )?
- Err(err)
-
- }}
- }
-
-
- #[doc(hidden)]
- pub use $NAME_ERR as [<_ $NAME_ERR>];
- #[doc(hidden)]
- pub use $NAME_DF_ERR as [<_ $NAME_DF_ERR>];
+ /// Macro wraps Err(`$ERR`) to add backtrace feature
+ #[macro_export]
+ macro_rules! $NAME_ERR {
+ ($d($d args:expr),* $d(; diagnostic = $d DIAG:expr)?) => {{
+ let err = $crate::$PREFIXED_NAME_DF_ERR!($d($d args),*);
+ $d (
+ let err = err.with_diagnostic($d DIAG);
+ )?
+ Err(err)
+ }}
}
+
+ #[doc(hidden)]
+ pub use $NAME_ERR as $PREFIXED_NAME_ERR;
+ #[doc(hidden)]
+ pub use $NAME_DF_ERR as $PREFIXED_NAME_DF_ERR;
};
}
// Exposes a macro to create `DataFusionError::Plan` with optional backtrace
-make_error!(plan_err, plan_datafusion_err, Plan);
+make_error!(
+ plan_err,
+ _plan_err,
+ plan_datafusion_err,
+ _plan_datafusion_err,
+ Plan
+);
// Exposes a macro to create `DataFusionError::Internal` with optional
backtrace
-make_error!(internal_err, internal_datafusion_err, Internal);
+make_error!(
+ internal_err,
+ _internal_err,
+ internal_datafusion_err,
+ _internal_datafusion_err,
+ Internal
+);
// Exposes a macro to create `DataFusionError::NotImplemented` with optional
backtrace
-make_error!(not_impl_err, not_impl_datafusion_err, NotImplemented);
+make_error!(
+ not_impl_err,
+ _not_impl_err,
+ not_impl_datafusion_err,
+ _not_impl_datafusion_err,
+ NotImplemented
+);
// Exposes a macro to create `DataFusionError::Execution` with optional
backtrace
-make_error!(exec_err, exec_datafusion_err, Execution);
+make_error!(
+ exec_err,
+ _exec_err,
+ exec_datafusion_err,
+ _exec_datafusion_err,
+ Execution
+);
// Exposes a macro to create `DataFusionError::Configuration` with optional
backtrace
-make_error!(config_err, config_datafusion_err, Configuration);
+make_error!(
+ config_err,
+ _config_err,
+ config_datafusion_err,
+ _config_datafusion_err,
+ Configuration
+);
// Exposes a macro to create `DataFusionError::Substrait` with optional
backtrace
-make_error!(substrait_err, substrait_datafusion_err, Substrait);
+make_error!(
+ substrait_err,
+ _substrait_err,
+ substrait_datafusion_err,
+ _substrait_datafusion_err,
+ Substrait
+);
// Exposes a macro to create `DataFusionError::ResourcesExhausted` with
optional backtrace
-make_error!(resources_err, resources_datafusion_err, ResourcesExhausted);
+make_error!(
+ resources_err,
+ _resources_err,
+ resources_datafusion_err,
+ _resources_datafusion_err,
+ ResourcesExhausted
+);
// Exposes a macro to create `DataFusionError::Ffi` with optional backtrace
-make_error!(ffi_err, ffi_datafusion_err, Ffi);
+make_error!(
+ ffi_err,
+ _ffi_err,
+ ffi_datafusion_err,
+ _ffi_datafusion_err,
+ Ffi
+);
// Exposes a macro to create `DataFusionError::SQL` with optional backtrace
#[macro_export]
diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml
index be507e0691..326b791a2f 100644
--- a/datafusion/core/Cargo.toml
+++ b/datafusion/core/Cargo.toml
@@ -172,7 +172,6 @@ bytes = { workspace = true }
env_logger = { workspace = true }
glob = { workspace = true }
insta = { workspace = true }
-paste = { workspace = true }
pretty_assertions = "1.0"
rand = { workspace = true, features = ["small_rng"] }
rand_distr = "0.5"
diff --git a/datafusion/core/tests/parquet/page_pruning.rs
b/datafusion/core/tests/parquet/page_pruning.rs
index 6d49e0bcc6..a41803191a 100644
--- a/datafusion/core/tests/parquet/page_pruning.rs
+++ b/datafusion/core/tests/parquet/page_pruning.rs
@@ -366,281 +366,367 @@ async fn prune_date64() {
}
macro_rules! int_tests {
- ($bits:expr) => {
- paste::item! {
- #[tokio::test]
- // null count min
max
- // page-0 0 -5
-1
- // page-1 0 -4
0
- // page-2 0 0
4
- // page-3 0 5
9
- async fn [<prune_int $bits _lt>]() {
- test_prune(
- Scenario::Int,
- &format!("SELECT * FROM t where i{} < 1", $bits),
- Some(0),
- Some(5),
- 11,
- 5,
- )
- .await;
- // result of sql "SELECT * FROM t where i < 1" is same as
- // "SELECT * FROM t where -i > -1"
- test_prune(
- Scenario::Int,
- &format!("SELECT * FROM t where -i{} > -1", $bits),
- Some(0),
- Some(5),
- 11,
- 5,
- )
- .await;
- }
-
- #[tokio::test]
- async fn [<prune_int $bits _gt >]() {
- test_prune(
- Scenario::Int,
- &format!("SELECT * FROM t where i{} > 8", $bits),
- Some(0),
- Some(15),
- 1,
- 5,
- )
- .await;
-
- test_prune(
- Scenario::Int,
- &format!("SELECT * FROM t where -i{} < -8", $bits),
- Some(0),
- Some(15),
- 1,
- 5,
- )
- .await;
- }
-
- #[tokio::test]
- async fn [<prune_int $bits _eq >]() {
- test_prune(
- Scenario::Int,
- &format!("SELECT * FROM t where i{} = 1", $bits),
- Some(0),
- Some(15),
- 1,
- 5
- )
- .await;
- }
- #[tokio::test]
- async fn [<prune_int $bits _scalar_fun_and_eq >]() {
- test_prune(
- Scenario::Int,
- &format!("SELECT * FROM t where abs(i{}) = 1 and i{} =
1", $bits, $bits),
- Some(0),
- Some(15),
- 1,
- 5
- )
- .await;
- }
-
- #[tokio::test]
- async fn [<prune_int $bits _scalar_fun >]() {
- test_prune(
- Scenario::Int,
- &format!("SELECT * FROM t where abs(i{}) = 1", $bits),
- Some(0),
- Some(0),
- 3,
- 5
- )
- .await;
- }
-
- #[tokio::test]
- async fn [<prune_int $bits _complex_expr>]() {
- test_prune(
- Scenario::Int,
- &format!("SELECT * FROM t where i{}+1 = 1", $bits),
- Some(0),
- Some(0),
- 2,
- 5
- )
- .await;
- }
-
- #[tokio::test]
- async fn [<prune_int $bits _complex_expr_subtract >]() {
- test_prune(
- Scenario::Int,
- &format!("SELECT * FROM t where 1-i{} > 1", $bits),
- Some(0),
- Some(0),
- 9,
- 5
- )
- .await;
- }
-
- #[tokio::test]
- async fn [<prune_int $bits _eq_in_list >]() {
- // result of sql "SELECT * FROM t where in (1)"
- test_prune(
- Scenario::Int,
- &format!("SELECT * FROM t where i{} in (1)", $bits),
- Some(0),
- Some(15),
- 1,
- 5
- )
- .await;
- }
-
- #[tokio::test]
- async fn [<prune_int $bits _eq_in_list_negated >]() {
- // result of sql "SELECT * FROM t where not in (1)" prune
nothing
- test_prune(
- Scenario::Int,
- &format!("SELECT * FROM t where i{} not in (1)", $bits),
- Some(0),
- Some(0),
- 19,
- 5
- )
- .await;
- }
+ ($bits:expr, $fn_lt:ident, $fn_gt:ident, $fn_eq:ident,
$fn_scalar_fun_and_eq:ident, $fn_scalar_fun:ident, $fn_complex_expr:ident,
$fn_complex_expr_subtract:ident, $fn_eq_in_list:ident,
$fn_eq_in_list_negated:ident) => {
+ #[tokio::test]
+ // null count min
max
+ // page-0 0 -5
-1
+ // page-1 0 -4
0
+ // page-2 0 0
4
+ // page-3 0 5
9
+ async fn $fn_lt() {
+ test_prune(
+ Scenario::Int,
+ &format!("SELECT * FROM t where i{} < 1", $bits),
+ Some(0),
+ Some(5),
+ 11,
+ 5,
+ )
+ .await;
+ // result of sql "SELECT * FROM t where i < 1" is same as
+ // "SELECT * FROM t where -i > -1"
+ test_prune(
+ Scenario::Int,
+ &format!("SELECT * FROM t where -i{} > -1", $bits),
+ Some(0),
+ Some(5),
+ 11,
+ 5,
+ )
+ .await;
}
- }
+
+ #[tokio::test]
+ async fn $fn_gt() {
+ test_prune(
+ Scenario::Int,
+ &format!("SELECT * FROM t where i{} > 8", $bits),
+ Some(0),
+ Some(15),
+ 1,
+ 5,
+ )
+ .await;
+
+ test_prune(
+ Scenario::Int,
+ &format!("SELECT * FROM t where -i{} < -8", $bits),
+ Some(0),
+ Some(15),
+ 1,
+ 5,
+ )
+ .await;
+ }
+
+ #[tokio::test]
+ async fn $fn_eq() {
+ test_prune(
+ Scenario::Int,
+ &format!("SELECT * FROM t where i{} = 1", $bits),
+ Some(0),
+ Some(15),
+ 1,
+ 5,
+ )
+ .await;
+ }
+ #[tokio::test]
+ async fn $fn_scalar_fun_and_eq() {
+ test_prune(
+ Scenario::Int,
+ &format!(
+ "SELECT * FROM t where abs(i{}) = 1 and i{} = 1",
+ $bits, $bits
+ ),
+ Some(0),
+ Some(15),
+ 1,
+ 5,
+ )
+ .await;
+ }
+
+ #[tokio::test]
+ async fn $fn_scalar_fun() {
+ test_prune(
+ Scenario::Int,
+ &format!("SELECT * FROM t where abs(i{}) = 1", $bits),
+ Some(0),
+ Some(0),
+ 3,
+ 5,
+ )
+ .await;
+ }
+
+ #[tokio::test]
+ async fn $fn_complex_expr() {
+ test_prune(
+ Scenario::Int,
+ &format!("SELECT * FROM t where i{}+1 = 1", $bits),
+ Some(0),
+ Some(0),
+ 2,
+ 5,
+ )
+ .await;
+ }
+
+ #[tokio::test]
+ async fn $fn_complex_expr_subtract() {
+ test_prune(
+ Scenario::Int,
+ &format!("SELECT * FROM t where 1-i{} > 1", $bits),
+ Some(0),
+ Some(0),
+ 9,
+ 5,
+ )
+ .await;
+ }
+
+ #[tokio::test]
+ async fn $fn_eq_in_list() {
+ // result of sql "SELECT * FROM t where in (1)"
+ test_prune(
+ Scenario::Int,
+ &format!("SELECT * FROM t where i{} in (1)", $bits),
+ Some(0),
+ Some(15),
+ 1,
+ 5,
+ )
+ .await;
+ }
+
+ #[tokio::test]
+ async fn $fn_eq_in_list_negated() {
+ // result of sql "SELECT * FROM t where not in (1)" prune nothing
+ test_prune(
+ Scenario::Int,
+ &format!("SELECT * FROM t where i{} not in (1)", $bits),
+ Some(0),
+ Some(0),
+ 19,
+ 5,
+ )
+ .await;
+ }
+ };
}
-int_tests!(8);
-int_tests!(16);
-int_tests!(32);
-int_tests!(64);
+int_tests!(
+ 8,
+ prune_int8_lt,
+ prune_int8_gt,
+ prune_int8_eq,
+ prune_int8_scalar_fun_and_eq,
+ prune_int8_scalar_fun,
+ prune_int8_complex_expr,
+ prune_int8_complex_expr_subtract,
+ prune_int8_eq_in_list,
+ prune_int8_eq_in_list_negated
+);
+int_tests!(
+ 16,
+ prune_int16_lt,
+ prune_int16_gt,
+ prune_int16_eq,
+ prune_int16_scalar_fun_and_eq,
+ prune_int16_scalar_fun,
+ prune_int16_complex_expr,
+ prune_int16_complex_expr_subtract,
+ prune_int16_eq_in_list,
+ prune_int16_eq_in_list_negated
+);
+int_tests!(
+ 32,
+ prune_int32_lt,
+ prune_int32_gt,
+ prune_int32_eq,
+ prune_int32_scalar_fun_and_eq,
+ prune_int32_scalar_fun,
+ prune_int32_complex_expr,
+ prune_int32_complex_expr_subtract,
+ prune_int32_eq_in_list,
+ prune_int32_eq_in_list_negated
+);
+int_tests!(
+ 64,
+ prune_int64_lt,
+ prune_int64_gt,
+ prune_int64_eq,
+ prune_int64_scalar_fun_and_eq,
+ prune_int64_scalar_fun,
+ prune_int64_complex_expr,
+ prune_int64_complex_expr_subtract,
+ prune_int64_eq_in_list,
+ prune_int64_eq_in_list_negated
+);
macro_rules! uint_tests {
- ($bits:expr) => {
- paste::item! {
- #[tokio::test]
- // null count min
max
- // page-0 0 0
4
- // page-1 0 1
5
- // page-2 0 5
9
- // page-3 0 250
254
- async fn [<prune_uint $bits _lt>]() {
- test_prune(
- Scenario::UInt,
- &format!("SELECT * FROM t where u{} < 6", $bits),
- Some(0),
- Some(5),
- 11,
- 5
- )
- .await;
- }
-
- #[tokio::test]
- async fn [<prune_uint $bits _gt >]() {
- test_prune(
- Scenario::UInt,
- &format!("SELECT * FROM t where u{} > 253", $bits),
- Some(0),
- Some(15),
- 1,
- 5
- )
- .await;
- }
-
- #[tokio::test]
- async fn [<prune_uint $bits _eq >]() {
- test_prune(
- Scenario::UInt,
- &format!("SELECT * FROM t where u{} = 6", $bits),
- Some(0),
- Some(15),
- 1,
- 5
- )
- .await;
- }
-
- #[tokio::test]
- async fn [<prune_uint $bits _scalar_fun_and_eq >]() {
- test_prune(
- Scenario::UInt,
- &format!("SELECT * FROM t where power(u{}, 2) = 36 and u{}
= 6", $bits, $bits),
- Some(0),
- Some(15),
- 1,
- 5
- )
- .await;
- }
-
- #[tokio::test]
- async fn [<prune_uint $bits _scalar_fun >]() {
- test_prune(
- Scenario::UInt,
- &format!("SELECT * FROM t where power(u{}, 2) = 25",
$bits),
- Some(0),
- Some(0),
- 2,
- 5
- )
- .await;
- }
-
- #[tokio::test]
- async fn [<prune_uint $bits _complex_expr>]() {
- test_prune(
- Scenario::UInt,
- &format!("SELECT * FROM t where u{}+1 = 6", $bits),
- Some(0),
- Some(0),
- 2,
- 5
- )
- .await;
- }
-
- #[tokio::test]
- async fn [<prune_uint $bits _eq_in_list >]() {
- // result of sql "SELECT * FROM t where in (1)"
- test_prune(
- Scenario::UInt,
- &format!("SELECT * FROM t where u{} in (6)", $bits),
- Some(0),
- Some(15),
- 1,
- 5
- )
- .await;
- }
-
- #[tokio::test]
- async fn [<prune_uint $bits _eq_in_list_negated >]() {
- // result of sql "SELECT * FROM t where not in (6)" prune
nothing
- test_prune(
- Scenario::UInt,
- &format!("SELECT * FROM t where u{} not in (6)", $bits),
- Some(0),
- Some(0),
- 19,
- 5
- )
- .await;
- }
+ ($bits:expr, $fn_lt:ident, $fn_gt:ident, $fn_eq:ident,
$fn_scalar_fun_and_eq:ident, $fn_scalar_fun:ident, $fn_complex_expr:ident,
$fn_eq_in_list:ident, $fn_eq_in_list_negated:ident) => {
+ #[tokio::test]
+ // null count min
max
+ // page-0 0 0
4
+ // page-1 0 1
5
+ // page-2 0 5
9
+ // page-3 0 250
254
+ async fn $fn_lt() {
+ test_prune(
+ Scenario::UInt,
+ &format!("SELECT * FROM t where u{} < 6", $bits),
+ Some(0),
+ Some(5),
+ 11,
+ 5,
+ )
+ .await;
}
- }
+
+ #[tokio::test]
+ async fn $fn_gt() {
+ test_prune(
+ Scenario::UInt,
+ &format!("SELECT * FROM t where u{} > 253", $bits),
+ Some(0),
+ Some(15),
+ 1,
+ 5,
+ )
+ .await;
+ }
+
+ #[tokio::test]
+ async fn $fn_eq() {
+ test_prune(
+ Scenario::UInt,
+ &format!("SELECT * FROM t where u{} = 6", $bits),
+ Some(0),
+ Some(15),
+ 1,
+ 5,
+ )
+ .await;
+ }
+
+ #[tokio::test]
+ async fn $fn_scalar_fun_and_eq() {
+ test_prune(
+ Scenario::UInt,
+ &format!(
+ "SELECT * FROM t where power(u{}, 2) = 36 and u{} = 6",
+ $bits, $bits
+ ),
+ Some(0),
+ Some(15),
+ 1,
+ 5,
+ )
+ .await;
+ }
+
+ #[tokio::test]
+ async fn $fn_scalar_fun() {
+ test_prune(
+ Scenario::UInt,
+ &format!("SELECT * FROM t where power(u{}, 2) = 25", $bits),
+ Some(0),
+ Some(0),
+ 2,
+ 5,
+ )
+ .await;
+ }
+
+ #[tokio::test]
+ async fn $fn_complex_expr() {
+ test_prune(
+ Scenario::UInt,
+ &format!("SELECT * FROM t where u{}+1 = 6", $bits),
+ Some(0),
+ Some(0),
+ 2,
+ 5,
+ )
+ .await;
+ }
+
+ #[tokio::test]
+ async fn $fn_eq_in_list() {
+ // result of sql "SELECT * FROM t where in (1)"
+ test_prune(
+ Scenario::UInt,
+ &format!("SELECT * FROM t where u{} in (6)", $bits),
+ Some(0),
+ Some(15),
+ 1,
+ 5,
+ )
+ .await;
+ }
+
+ #[tokio::test]
+ async fn $fn_eq_in_list_negated() {
+ // result of sql "SELECT * FROM t where not in (6)" prune nothing
+ test_prune(
+ Scenario::UInt,
+ &format!("SELECT * FROM t where u{} not in (6)", $bits),
+ Some(0),
+ Some(0),
+ 19,
+ 5,
+ )
+ .await;
+ }
+ };
}
-uint_tests!(8);
-uint_tests!(16);
-uint_tests!(32);
-uint_tests!(64);
+uint_tests!(
+ 8,
+ prune_uint8_lt,
+ prune_uint8_gt,
+ prune_uint8_eq,
+ prune_uint8_scalar_fun_and_eq,
+ prune_uint8_scalar_fun,
+ prune_uint8_complex_expr,
+ prune_uint8_eq_in_list,
+ prune_uint8_eq_in_list_negated
+);
+uint_tests!(
+ 16,
+ prune_uint16_lt,
+ prune_uint16_gt,
+ prune_uint16_eq,
+ prune_uint16_scalar_fun_and_eq,
+ prune_uint16_scalar_fun,
+ prune_uint16_complex_expr,
+ prune_uint16_eq_in_list,
+ prune_uint16_eq_in_list_negated
+);
+uint_tests!(
+ 32,
+ prune_uint32_lt,
+ prune_uint32_gt,
+ prune_uint32_eq,
+ prune_uint32_scalar_fun_and_eq,
+ prune_uint32_scalar_fun,
+ prune_uint32_complex_expr,
+ prune_uint32_eq_in_list,
+ prune_uint32_eq_in_list_negated
+);
+uint_tests!(
+ 64,
+ prune_uint64_lt,
+ prune_uint64_gt,
+ prune_uint64_eq,
+ prune_uint64_scalar_fun_and_eq,
+ prune_uint64_scalar_fun,
+ prune_uint64_complex_expr,
+ prune_uint64_eq_in_list,
+ prune_uint64_eq_in_list_negated
+);
#[tokio::test]
// null count min
max
diff --git a/datafusion/core/tests/parquet/row_group_pruning.rs
b/datafusion/core/tests/parquet/row_group_pruning.rs
index 445ae7e97f..3ec3541af9 100644
--- a/datafusion/core/tests/parquet/row_group_pruning.rs
+++ b/datafusion/core/tests/parquet/row_group_pruning.rs
@@ -399,321 +399,365 @@ async fn prune_disabled() {
// https://github.com/apache/datafusion/issues/9779 bug so that tests pass
// if and only if Bloom filters on Int8 and Int16 columns are still buggy.
macro_rules! int_tests {
- ($bits:expr) => {
- paste::item! {
- #[tokio::test]
- async fn [<prune_int $bits _lt >]() {
- RowGroupPruningTest::new()
- .with_scenario(Scenario::Int)
- .with_query(&format!("SELECT * FROM t where i{} < 1",
$bits))
- .with_expected_errors(Some(0))
- .with_matched_by_stats(Some(3))
- .with_pruned_by_stats(Some(1))
- .with_pruned_files(Some(0))
- .with_matched_by_bloom_filter(Some(3))
- .with_pruned_by_bloom_filter(Some(0))
- .with_expected_rows(11)
- .test_row_group_prune()
- .await;
-
- // result of sql "SELECT * FROM t where i < 1" is same as
- // "SELECT * FROM t where -i > -1"
- RowGroupPruningTest::new()
- .with_scenario(Scenario::Int)
- .with_query(&format!("SELECT * FROM t where -i{} > -1",
$bits))
- .with_expected_errors(Some(0))
- .with_matched_by_stats(Some(3))
- .with_pruned_by_stats(Some(1))
- .with_pruned_files(Some(0))
- .with_matched_by_bloom_filter(Some(3))
- .with_pruned_by_bloom_filter(Some(0))
- .with_expected_rows(11)
- .test_row_group_prune()
- .await;
- }
-
- #[tokio::test]
- async fn [<prune_int $bits _eq >]() {
- RowGroupPruningTest::new()
- .with_scenario(Scenario::Int)
- .with_query(&format!("SELECT * FROM t where i{} = 1",
$bits))
- .with_expected_errors(Some(0))
- .with_matched_by_stats(Some(1))
- .with_pruned_by_stats(Some(3))
- .with_pruned_files(Some(0))
- .with_matched_by_bloom_filter(Some(1))
- .with_pruned_by_bloom_filter(Some(0))
- .with_expected_rows(1)
- .test_row_group_prune()
- .await;
- }
- #[tokio::test]
- async fn [<prune_int $bits _scalar_fun_and_eq >]() {
- RowGroupPruningTest::new()
- .with_scenario(Scenario::Int)
- .with_query(&format!("SELECT * FROM t where abs(i{}) = 1
and i{} = 1", $bits, $bits))
- .with_expected_errors(Some(0))
- .with_matched_by_stats(Some(1))
- .with_pruned_by_stats(Some(3))
- .with_pruned_files(Some(0))
- .with_matched_by_bloom_filter(Some(1))
- .with_pruned_by_bloom_filter(Some(0))
- .with_expected_rows(1)
- .test_row_group_prune()
- .await;
- }
-
- #[tokio::test]
- async fn [<prune_int $bits _scalar_fun >]() {
- RowGroupPruningTest::new()
- .with_scenario(Scenario::Int)
- .with_query(&format!("SELECT * FROM t where abs(i{}) = 1",
$bits))
- .with_expected_errors(Some(0))
- .with_matched_by_stats(Some(4))
- .with_pruned_by_stats(Some(0))
- .with_pruned_files(Some(0))
- .with_matched_by_bloom_filter(Some(4))
- .with_pruned_by_bloom_filter(Some(0))
- .with_expected_rows(3)
- .test_row_group_prune()
- .await;
- }
-
- #[tokio::test]
- async fn [<prune_int $bits _complex_expr >]() {
- RowGroupPruningTest::new()
- .with_scenario(Scenario::Int)
- .with_query(&format!("SELECT * FROM t where i{}+1 = 1",
$bits))
- .with_expected_errors(Some(0))
- .with_matched_by_stats(Some(4))
- .with_pruned_by_stats(Some(0))
- .with_pruned_files(Some(0))
- .with_matched_by_bloom_filter(Some(4))
- .with_pruned_by_bloom_filter(Some(0))
- .with_expected_rows(2)
- .test_row_group_prune()
- .await;
- }
-
- #[tokio::test]
- async fn [<prune_int $bits _complex_expr_subtract >]() {
- RowGroupPruningTest::new()
- .with_scenario(Scenario::Int)
- .with_query(&format!("SELECT * FROM t where 1-i{} > 1",
$bits))
- .with_expected_errors(Some(0))
- .with_matched_by_stats(Some(4))
- .with_pruned_by_stats(Some(0))
- .with_pruned_files(Some(0))
- .with_matched_by_bloom_filter(Some(4))
- .with_pruned_by_bloom_filter(Some(0))
- .with_expected_rows(9)
- .test_row_group_prune()
- .await;
- }
-
- #[tokio::test]
- async fn [<prune_int $bits _eq_in_list >]() {
- // result of sql "SELECT * FROM t where in (1)"
- RowGroupPruningTest::new()
- .with_scenario(Scenario::Int)
- .with_query(&format!("SELECT * FROM t where i{} in (1)",
$bits))
- .with_expected_errors(Some(0))
- .with_matched_by_stats(Some(1))
- .with_pruned_by_stats(Some(3))
- .with_pruned_files(Some(0))
- .with_matched_by_bloom_filter(Some(1))
- .with_pruned_by_bloom_filter(Some(0))
- .with_expected_rows(1)
- .test_row_group_prune()
- .await;
- }
-
- #[tokio::test]
- async fn [<prune_int $bits _eq_in_list_2 >]() {
- // result of sql "SELECT * FROM t where in (1000)", prune all
- // test whether statistics works
- RowGroupPruningTest::new()
- .with_scenario(Scenario::Int)
- .with_query(&format!("SELECT * FROM t where i{} in (100)",
$bits))
- .with_expected_errors(Some(0))
- .with_matched_by_stats(Some(0))
- .with_pruned_by_stats(Some(0))
- .with_pruned_files(Some(1))
- .with_matched_by_bloom_filter(Some(0))
- .with_pruned_by_bloom_filter(Some(0))
- .with_expected_rows(0)
- .test_row_group_prune()
- .await;
- }
-
- #[tokio::test]
- async fn [<prune_int $bits _eq_in_list_negated >]() {
- // result of sql "SELECT * FROM t where not in (1)" prune
nothing
- RowGroupPruningTest::new()
- .with_scenario(Scenario::Int)
- .with_query(&format!("SELECT * FROM t where i{} not in
(1)", $bits))
- .with_expected_errors(Some(0))
- .with_matched_by_stats(Some(4))
- .with_pruned_by_stats(Some(0))
- .with_pruned_files(Some(0))
- .with_matched_by_bloom_filter(Some(4))
- .with_pruned_by_bloom_filter(Some(0))
- .with_expected_rows(19)
- .test_row_group_prune()
- .await;
- }
+ ($bits:expr, $fn_lt:ident, $fn_eq:ident, $fn_scalar_fun_and_eq:ident,
$fn_scalar_fun:ident, $fn_complex_expr:ident, $fn_complex_expr_subtract:ident,
$fn_eq_in_list:ident, $fn_eq_in_list_2:ident, $fn_eq_in_list_negated:ident) => {
+ #[tokio::test]
+ async fn $fn_lt() {
+ RowGroupPruningTest::new()
+ .with_scenario(Scenario::Int)
+ .with_query(&format!("SELECT * FROM t where i{} < 1", $bits))
+ .with_expected_errors(Some(0))
+ .with_matched_by_stats(Some(3))
+ .with_pruned_by_stats(Some(1))
+ .with_pruned_files(Some(0))
+ .with_matched_by_bloom_filter(Some(3))
+ .with_pruned_by_bloom_filter(Some(0))
+ .with_expected_rows(11)
+ .test_row_group_prune()
+ .await;
+
+ // result of sql "SELECT * FROM t where i < 1" is same as
+ // "SELECT * FROM t where -i > -1"
+ RowGroupPruningTest::new()
+ .with_scenario(Scenario::Int)
+ .with_query(&format!("SELECT * FROM t where -i{} > -1", $bits))
+ .with_expected_errors(Some(0))
+ .with_matched_by_stats(Some(3))
+ .with_pruned_by_stats(Some(1))
+ .with_pruned_files(Some(0))
+ .with_matched_by_bloom_filter(Some(3))
+ .with_pruned_by_bloom_filter(Some(0))
+ .with_expected_rows(11)
+ .test_row_group_prune()
+ .await;
+ }
+
+ #[tokio::test]
+ async fn $fn_eq() {
+ RowGroupPruningTest::new()
+ .with_scenario(Scenario::Int)
+ .with_query(&format!("SELECT * FROM t where i{} = 1", $bits))
+ .with_expected_errors(Some(0))
+ .with_matched_by_stats(Some(1))
+ .with_pruned_by_stats(Some(3))
+ .with_pruned_files(Some(0))
+ .with_matched_by_bloom_filter(Some(1))
+ .with_pruned_by_bloom_filter(Some(0))
+ .with_expected_rows(1)
+ .test_row_group_prune()
+ .await;
+ }
+ #[tokio::test]
+ async fn $fn_scalar_fun_and_eq() {
+ RowGroupPruningTest::new()
+ .with_scenario(Scenario::Int)
+ .with_query(&format!(
+ "SELECT * FROM t where abs(i{}) = 1 and i{} = 1",
+ $bits, $bits
+ ))
+ .with_expected_errors(Some(0))
+ .with_matched_by_stats(Some(1))
+ .with_pruned_by_stats(Some(3))
+ .with_pruned_files(Some(0))
+ .with_matched_by_bloom_filter(Some(1))
+ .with_pruned_by_bloom_filter(Some(0))
+ .with_expected_rows(1)
+ .test_row_group_prune()
+ .await;
+ }
+
+ #[tokio::test]
+ async fn $fn_scalar_fun() {
+ RowGroupPruningTest::new()
+ .with_scenario(Scenario::Int)
+ .with_query(&format!("SELECT * FROM t where abs(i{}) = 1",
$bits))
+ .with_expected_errors(Some(0))
+ .with_matched_by_stats(Some(4))
+ .with_pruned_by_stats(Some(0))
+ .with_pruned_files(Some(0))
+ .with_matched_by_bloom_filter(Some(4))
+ .with_pruned_by_bloom_filter(Some(0))
+ .with_expected_rows(3)
+ .test_row_group_prune()
+ .await;
+ }
+
+ #[tokio::test]
+ async fn $fn_complex_expr() {
+ RowGroupPruningTest::new()
+ .with_scenario(Scenario::Int)
+ .with_query(&format!("SELECT * FROM t where i{}+1 = 1", $bits))
+ .with_expected_errors(Some(0))
+ .with_matched_by_stats(Some(4))
+ .with_pruned_by_stats(Some(0))
+ .with_pruned_files(Some(0))
+ .with_matched_by_bloom_filter(Some(4))
+ .with_pruned_by_bloom_filter(Some(0))
+ .with_expected_rows(2)
+ .test_row_group_prune()
+ .await;
+ }
+
+ #[tokio::test]
+ async fn $fn_complex_expr_subtract() {
+ RowGroupPruningTest::new()
+ .with_scenario(Scenario::Int)
+ .with_query(&format!("SELECT * FROM t where 1-i{} > 1", $bits))
+ .with_expected_errors(Some(0))
+ .with_matched_by_stats(Some(4))
+ .with_pruned_by_stats(Some(0))
+ .with_pruned_files(Some(0))
+ .with_matched_by_bloom_filter(Some(4))
+ .with_pruned_by_bloom_filter(Some(0))
+ .with_expected_rows(9)
+ .test_row_group_prune()
+ .await;
+ }
+
+ #[tokio::test]
+ async fn $fn_eq_in_list() {
+ // result of sql "SELECT * FROM t where in (1)"
+ RowGroupPruningTest::new()
+ .with_scenario(Scenario::Int)
+ .with_query(&format!("SELECT * FROM t where i{} in (1)",
$bits))
+ .with_expected_errors(Some(0))
+ .with_matched_by_stats(Some(1))
+ .with_pruned_by_stats(Some(3))
+ .with_pruned_files(Some(0))
+ .with_matched_by_bloom_filter(Some(1))
+ .with_pruned_by_bloom_filter(Some(0))
+ .with_expected_rows(1)
+ .test_row_group_prune()
+ .await;
+ }
+
+ #[tokio::test]
+ async fn $fn_eq_in_list_2() {
+ // result of sql "SELECT * FROM t where in (1000)", prune all
+ // test whether statistics works
+ RowGroupPruningTest::new()
+ .with_scenario(Scenario::Int)
+ .with_query(&format!("SELECT * FROM t where i{} in (100)",
$bits))
+ .with_expected_errors(Some(0))
+ .with_matched_by_stats(Some(0))
+ .with_pruned_by_stats(Some(0))
+ .with_pruned_files(Some(1))
+ .with_matched_by_bloom_filter(Some(0))
+ .with_pruned_by_bloom_filter(Some(0))
+ .with_expected_rows(0)
+ .test_row_group_prune()
+ .await;
+ }
+
+ #[tokio::test]
+ async fn $fn_eq_in_list_negated() {
+ // result of sql "SELECT * FROM t where not in (1)" prune nothing
+ RowGroupPruningTest::new()
+ .with_scenario(Scenario::Int)
+ .with_query(&format!("SELECT * FROM t where i{} not in (1)",
$bits))
+ .with_expected_errors(Some(0))
+ .with_matched_by_stats(Some(4))
+ .with_pruned_by_stats(Some(0))
+ .with_pruned_files(Some(0))
+ .with_matched_by_bloom_filter(Some(4))
+ .with_pruned_by_bloom_filter(Some(0))
+ .with_expected_rows(19)
+ .test_row_group_prune()
+ .await;
}
};
}
// int8/int16 are incorrect: https://github.com/apache/datafusion/issues/9779
-int_tests!(32);
-int_tests!(64);
+int_tests!(
+ 32,
+ prune_int32_lt,
+ prune_int32_eq,
+ prune_int32_scalar_fun_and_eq,
+ prune_int32_scalar_fun,
+ prune_int32_complex_expr,
+ prune_int32_complex_expr_subtract,
+ prune_int32_eq_in_list,
+ prune_int32_eq_in_list_2,
+ prune_int32_eq_in_list_negated
+);
+int_tests!(
+ 64,
+ prune_int64_lt,
+ prune_int64_eq,
+ prune_int64_scalar_fun_and_eq,
+ prune_int64_scalar_fun,
+ prune_int64_complex_expr,
+ prune_int64_complex_expr_subtract,
+ prune_int64_eq_in_list,
+ prune_int64_eq_in_list_2,
+ prune_int64_eq_in_list_negated
+);
// $bits: number of bits of the integer to test (8, 16, 32, 64)
// $correct_bloom_filters: if false, replicates the
// https://github.com/apache/datafusion/issues/9779 bug so that tests pass
// if and only if Bloom filters on UInt8 and UInt16 columns are still buggy.
macro_rules! uint_tests {
- ($bits:expr) => {
- paste::item! {
- #[tokio::test]
- async fn [<prune_uint $bits _lt >]() {
- RowGroupPruningTest::new()
- .with_scenario(Scenario::UInt)
- .with_query(&format!("SELECT * FROM t where u{} < 6",
$bits))
- .with_expected_errors(Some(0))
- .with_matched_by_stats(Some(3))
- .with_pruned_by_stats(Some(1))
- .with_pruned_files(Some(0))
- .with_matched_by_bloom_filter(Some(3))
- .with_pruned_by_bloom_filter(Some(0))
- .with_expected_rows(11)
- .test_row_group_prune()
- .await;
- }
-
- #[tokio::test]
- async fn [<prune_uint $bits _eq >]() {
- RowGroupPruningTest::new()
- .with_scenario(Scenario::UInt)
- .with_query(&format!("SELECT * FROM t where u{} = 6",
$bits))
- .with_expected_errors(Some(0))
- .with_matched_by_stats(Some(1))
- .with_pruned_by_stats(Some(3))
- .with_pruned_files(Some(0))
- .with_matched_by_bloom_filter(Some(1))
- .with_pruned_by_bloom_filter(Some(0))
- .with_expected_rows(1)
- .test_row_group_prune()
- .await;
- }
- #[tokio::test]
- async fn [<prune_uint $bits _scalar_fun_and_eq >]() {
- RowGroupPruningTest::new()
- .with_scenario(Scenario::UInt)
- .with_query(&format!("SELECT * FROM t where power(u{}, 2)
= 36 and u{} = 6", $bits, $bits))
- .with_expected_errors(Some(0))
- .with_matched_by_stats(Some(1))
- .with_pruned_by_stats(Some(3))
- .with_pruned_files(Some(0))
- .with_matched_by_bloom_filter(Some(1))
- .with_pruned_by_bloom_filter(Some(0))
- .with_expected_rows(1)
- .test_row_group_prune()
- .await;
- }
-
- #[tokio::test]
- async fn [<prune_uint $bits _scalar_fun >]() {
- RowGroupPruningTest::new()
- .with_scenario(Scenario::UInt)
- .with_query(&format!("SELECT * FROM t where power(u{}, 2)
= 25", $bits))
- .with_expected_errors(Some(0))
- .with_matched_by_stats(Some(4))
- .with_pruned_by_stats(Some(0))
- .with_pruned_files(Some(0))
- .with_matched_by_bloom_filter(Some(4))
- .with_pruned_by_bloom_filter(Some(0))
- .with_expected_rows(2)
- .test_row_group_prune()
- .await;
- }
-
- #[tokio::test]
- async fn [<prune_uint $bits _complex_expr >]() {
- RowGroupPruningTest::new()
- .with_scenario(Scenario::UInt)
- .with_query(&format!("SELECT * FROM t where u{}+1 = 6",
$bits))
- .with_expected_errors(Some(0))
- .with_matched_by_stats(Some(4))
- .with_pruned_by_stats(Some(0))
- .with_pruned_files(Some(0))
- .with_matched_by_bloom_filter(Some(4))
- .with_pruned_by_bloom_filter(Some(0))
- .with_expected_rows(2)
- .test_row_group_prune()
- .await;
- }
-
- #[tokio::test]
- async fn [<prune_uint $bits _eq_in_list >]() {
- // result of sql "SELECT * FROM t where in (1)"
- RowGroupPruningTest::new()
- .with_scenario(Scenario::UInt)
- .with_query(&format!("SELECT * FROM t where u{} in (6)",
$bits))
- .with_expected_errors(Some(0))
- .with_matched_by_stats(Some(1))
- .with_pruned_by_stats(Some(3))
- .with_pruned_files(Some(0))
- .with_matched_by_bloom_filter(Some(1))
- .with_pruned_by_bloom_filter(Some(0))
- .with_expected_rows(1)
- .test_row_group_prune()
- .await;
- }
-
- #[tokio::test]
- async fn [<prune_uint $bits _eq_in_list_2 >]() {
- // result of sql "SELECT * FROM t where in (1000)", prune all
- // test whether statistics works
- RowGroupPruningTest::new()
- .with_scenario(Scenario::UInt)
- .with_query(&format!("SELECT * FROM t where u{} in (100)",
$bits))
- .with_expected_errors(Some(0))
- .with_matched_by_stats(Some(0))
- .with_pruned_by_stats(Some(4))
- .with_pruned_files(Some(0))
- .with_matched_by_bloom_filter(Some(0))
- .with_pruned_by_bloom_filter(Some(0))
- .with_expected_rows(0)
- .test_row_group_prune()
- .await;
- }
-
- #[tokio::test]
- async fn [<prune_uint $bits _eq_in_list_negated >]() {
- // result of sql "SELECT * FROM t where not in (1)" prune
nothing
- RowGroupPruningTest::new()
- .with_scenario(Scenario::UInt)
- .with_query(&format!("SELECT * FROM t where u{} not in
(6)", $bits))
- .with_expected_errors(Some(0))
- .with_matched_by_stats(Some(4))
- .with_pruned_by_stats(Some(0))
- .with_pruned_files(Some(0))
- .with_matched_by_bloom_filter(Some(4))
- .with_pruned_by_bloom_filter(Some(0))
- .with_expected_rows(19)
- .test_row_group_prune()
- .await;
- }
+ ($bits:expr, $fn_lt:ident, $fn_eq:ident, $fn_scalar_fun_and_eq:ident,
$fn_scalar_fun:ident, $fn_complex_expr:ident, $fn_eq_in_list:ident,
$fn_eq_in_list_2:ident, $fn_eq_in_list_negated:ident) => {
+ #[tokio::test]
+ async fn $fn_lt() {
+ RowGroupPruningTest::new()
+ .with_scenario(Scenario::UInt)
+ .with_query(&format!("SELECT * FROM t where u{} < 6", $bits))
+ .with_expected_errors(Some(0))
+ .with_matched_by_stats(Some(3))
+ .with_pruned_by_stats(Some(1))
+ .with_pruned_files(Some(0))
+ .with_matched_by_bloom_filter(Some(3))
+ .with_pruned_by_bloom_filter(Some(0))
+ .with_expected_rows(11)
+ .test_row_group_prune()
+ .await;
+ }
+
+ #[tokio::test]
+ async fn $fn_eq() {
+ RowGroupPruningTest::new()
+ .with_scenario(Scenario::UInt)
+ .with_query(&format!("SELECT * FROM t where u{} = 6", $bits))
+ .with_expected_errors(Some(0))
+ .with_matched_by_stats(Some(1))
+ .with_pruned_by_stats(Some(3))
+ .with_pruned_files(Some(0))
+ .with_matched_by_bloom_filter(Some(1))
+ .with_pruned_by_bloom_filter(Some(0))
+ .with_expected_rows(1)
+ .test_row_group_prune()
+ .await;
+ }
+ #[tokio::test]
+ async fn $fn_scalar_fun_and_eq() {
+ RowGroupPruningTest::new()
+ .with_scenario(Scenario::UInt)
+ .with_query(&format!(
+ "SELECT * FROM t where power(u{}, 2) = 36 and u{} = 6",
+ $bits, $bits
+ ))
+ .with_expected_errors(Some(0))
+ .with_matched_by_stats(Some(1))
+ .with_pruned_by_stats(Some(3))
+ .with_pruned_files(Some(0))
+ .with_matched_by_bloom_filter(Some(1))
+ .with_pruned_by_bloom_filter(Some(0))
+ .with_expected_rows(1)
+ .test_row_group_prune()
+ .await;
+ }
+
+ #[tokio::test]
+ async fn $fn_scalar_fun() {
+ RowGroupPruningTest::new()
+ .with_scenario(Scenario::UInt)
+ .with_query(&format!("SELECT * FROM t where power(u{}, 2) =
25", $bits))
+ .with_expected_errors(Some(0))
+ .with_matched_by_stats(Some(4))
+ .with_pruned_by_stats(Some(0))
+ .with_pruned_files(Some(0))
+ .with_matched_by_bloom_filter(Some(4))
+ .with_pruned_by_bloom_filter(Some(0))
+ .with_expected_rows(2)
+ .test_row_group_prune()
+ .await;
+ }
+
+ #[tokio::test]
+ async fn $fn_complex_expr() {
+ RowGroupPruningTest::new()
+ .with_scenario(Scenario::UInt)
+ .with_query(&format!("SELECT * FROM t where u{}+1 = 6", $bits))
+ .with_expected_errors(Some(0))
+ .with_matched_by_stats(Some(4))
+ .with_pruned_by_stats(Some(0))
+ .with_pruned_files(Some(0))
+ .with_matched_by_bloom_filter(Some(4))
+ .with_pruned_by_bloom_filter(Some(0))
+ .with_expected_rows(2)
+ .test_row_group_prune()
+ .await;
+ }
+
+ #[tokio::test]
+ async fn $fn_eq_in_list() {
+ // result of sql "SELECT * FROM t where in (1)"
+ RowGroupPruningTest::new()
+ .with_scenario(Scenario::UInt)
+ .with_query(&format!("SELECT * FROM t where u{} in (6)",
$bits))
+ .with_expected_errors(Some(0))
+ .with_matched_by_stats(Some(1))
+ .with_pruned_by_stats(Some(3))
+ .with_pruned_files(Some(0))
+ .with_matched_by_bloom_filter(Some(1))
+ .with_pruned_by_bloom_filter(Some(0))
+ .with_expected_rows(1)
+ .test_row_group_prune()
+ .await;
+ }
+
+ #[tokio::test]
+ async fn $fn_eq_in_list_2() {
+ // result of sql "SELECT * FROM t where in (1000)", prune all
+ // test whether statistics works
+ RowGroupPruningTest::new()
+ .with_scenario(Scenario::UInt)
+ .with_query(&format!("SELECT * FROM t where u{} in (100)",
$bits))
+ .with_expected_errors(Some(0))
+ .with_matched_by_stats(Some(0))
+ .with_pruned_by_stats(Some(4))
+ .with_pruned_files(Some(0))
+ .with_matched_by_bloom_filter(Some(0))
+ .with_pruned_by_bloom_filter(Some(0))
+ .with_expected_rows(0)
+ .test_row_group_prune()
+ .await;
+ }
+
+ #[tokio::test]
+ async fn $fn_eq_in_list_negated() {
+ // result of sql "SELECT * FROM t where not in (1)" prune nothing
+ RowGroupPruningTest::new()
+ .with_scenario(Scenario::UInt)
+ .with_query(&format!("SELECT * FROM t where u{} not in (6)",
$bits))
+ .with_expected_errors(Some(0))
+ .with_matched_by_stats(Some(4))
+ .with_pruned_by_stats(Some(0))
+ .with_pruned_files(Some(0))
+ .with_matched_by_bloom_filter(Some(4))
+ .with_pruned_by_bloom_filter(Some(0))
+ .with_expected_rows(19)
+ .test_row_group_prune()
+ .await;
}
};
}
// uint8/uint16 are incorrect: https://github.com/apache/datafusion/issues/9779
-uint_tests!(32);
-uint_tests!(64);
+uint_tests!(
+ 32,
+ prune_uint32_lt,
+ prune_uint32_eq,
+ prune_uint32_scalar_fun_and_eq,
+ prune_uint32_scalar_fun,
+ prune_uint32_complex_expr,
+ prune_uint32_eq_in_list,
+ prune_uint32_eq_in_list_2,
+ prune_uint32_eq_in_list_negated
+);
+uint_tests!(
+ 64,
+ prune_uint64_lt,
+ prune_uint64_eq,
+ prune_uint64_scalar_fun_and_eq,
+ prune_uint64_scalar_fun,
+ prune_uint64_complex_expr,
+ prune_uint64_eq_in_list,
+ prune_uint64_eq_in_list_2,
+ prune_uint64_eq_in_list_negated
+);
#[tokio::test]
async fn prune_int32_eq_large_in_list() {
diff --git a/datafusion/expr-common/Cargo.toml
b/datafusion/expr-common/Cargo.toml
index d66d8ee858..072c8f14da 100644
--- a/datafusion/expr-common/Cargo.toml
+++ b/datafusion/expr-common/Cargo.toml
@@ -45,7 +45,6 @@ arrow = { workspace = true }
datafusion-common = { workspace = true }
indexmap = { workspace = true }
itertools = { workspace = true }
-paste = { workspace = true }
[dev-dependencies]
insta = { workspace = true }
diff --git a/datafusion/expr-common/src/interval_arithmetic.rs
b/datafusion/expr-common/src/interval_arithmetic.rs
index f93ef3b795..0f88723d11 100644
--- a/datafusion/expr-common/src/interval_arithmetic.rs
+++ b/datafusion/expr-common/src/interval_arithmetic.rs
@@ -37,7 +37,7 @@ use datafusion_common::{
};
macro_rules! get_extreme_value {
- ($extreme:ident, $value:expr) => {
+ ($extreme:ident, $DECIMAL128_ARRAY:ident, $DECIMAL256_ARRAY:ident,
$value:expr) => {
match $value {
DataType::UInt8 => ScalarValue::UInt8(Some(u8::$extreme)),
DataType::UInt16 => ScalarValue::UInt16(Some(u16::$extreme)),
@@ -83,18 +83,12 @@ macro_rules! get_extreme_value {
ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNano::$extreme))
}
DataType::Decimal128(precision, scale) => ScalarValue::Decimal128(
- Some(
- paste::paste! {[<$extreme _DECIMAL128_FOR_EACH_PRECISION>]}
- [*precision as usize],
- ),
+ Some($DECIMAL128_ARRAY[*precision as usize]),
*precision,
*scale,
),
DataType::Decimal256(precision, scale) => ScalarValue::Decimal256(
- Some(
- paste::paste! {[<$extreme _DECIMAL256_FOR_EACH_PRECISION>]}
- [*precision as usize],
- ),
+ Some($DECIMAL256_ARRAY[*precision as usize]),
*precision,
*scale,
),
@@ -1162,10 +1156,20 @@ fn handle_overflow<const UPPER: bool>(
match (UPPER, positive_sign) {
(true, true) | (false, false) => ScalarValue::try_from(dt).unwrap(),
(true, false) => {
- get_extreme_value!(MIN, dt)
+ get_extreme_value!(
+ MIN,
+ MIN_DECIMAL128_FOR_EACH_PRECISION,
+ MIN_DECIMAL256_FOR_EACH_PRECISION,
+ dt
+ )
}
(false, true) => {
- get_extreme_value!(MAX, dt)
+ get_extreme_value!(
+ MAX,
+ MAX_DECIMAL128_FOR_EACH_PRECISION,
+ MAX_DECIMAL256_FOR_EACH_PRECISION,
+ dt
+ )
}
}
}
@@ -4218,12 +4222,8 @@ mod tests {
}
macro_rules! capture_mode_change {
- ($TYPE:ty) => {
- paste::item! {
- capture_mode_change_helper!([<capture_mode_change_ $TYPE>],
- [<create_interval_ $TYPE>],
- $TYPE);
- }
+ ($TYPE:ty, $TEST_FN_NAME:ident, $CREATE_FN_NAME:ident) => {
+ capture_mode_change_helper!($TEST_FN_NAME, $CREATE_FN_NAME, $TYPE);
};
}
@@ -4251,8 +4251,8 @@ mod tests {
};
}
- capture_mode_change!(f32);
- capture_mode_change!(f64);
+ capture_mode_change!(f32, capture_mode_change_f32, create_interval_f32);
+ capture_mode_change!(f64, capture_mode_change_f64, create_interval_f64);
#[cfg(all(
any(target_arch = "x86_64", target_arch = "aarch64"),
diff --git a/datafusion/expr/Cargo.toml b/datafusion/expr/Cargo.toml
index 75aa59595b..6990714585 100644
--- a/datafusion/expr/Cargo.toml
+++ b/datafusion/expr/Cargo.toml
@@ -57,7 +57,6 @@ datafusion-functions-window-common = { workspace = true }
datafusion-physical-expr-common = { workspace = true }
indexmap = { workspace = true }
itertools = { workspace = true }
-paste = { workspace = true }
recursive = { workspace = true, optional = true }
serde_json = { workspace = true }
sqlparser = { workspace = true, optional = true }
diff --git a/datafusion/expr/src/test/function_stub.rs
b/datafusion/expr/src/test/function_stub.rs
index d784169d54..26ac16d90d 100644
--- a/datafusion/expr/src/test/function_stub.rs
+++ b/datafusion/expr/src/test/function_stub.rs
@@ -41,7 +41,6 @@ use crate::{
macro_rules! create_func {
($UDAF:ty, $AGGREGATE_UDF_FN:ident) => {
- paste::paste! {
#[doc = concat!("AggregateFunction that returns a
[AggregateUDF](crate::AggregateUDF) for [`", stringify!($UDAF), "`]")]
pub fn $AGGREGATE_UDF_FN() -> std::sync::Arc<crate::AggregateUDF> {
// Singleton instance of [$UDAF], ensures the UDAF is only
created once
@@ -51,7 +50,6 @@ macro_rules! create_func {
});
std::sync::Arc::clone(&INSTANCE)
}
- }
}
}
diff --git a/datafusion/functions-aggregate/Cargo.toml
b/datafusion/functions-aggregate/Cargo.toml
index 39337e44bb..1ca494e38e 100644
--- a/datafusion/functions-aggregate/Cargo.toml
+++ b/datafusion/functions-aggregate/Cargo.toml
@@ -54,7 +54,6 @@ datafusion-physical-expr-common = { workspace = true }
half = { workspace = true }
log = { workspace = true }
num-traits = { workspace = true }
-paste = { workspace = true }
[dev-dependencies]
arrow = { workspace = true, features = ["test_utils"] }
diff --git a/datafusion/functions-aggregate/src/macros.rs
b/datafusion/functions-aggregate/src/macros.rs
index 6c6bf72838..0c919a1e5e 100644
--- a/datafusion/functions-aggregate/src/macros.rs
+++ b/datafusion/functions-aggregate/src/macros.rs
@@ -67,7 +67,6 @@ macro_rules! create_func {
create_func!($UDAF, $AGGREGATE_UDF_FN, <$UDAF>::default());
};
($UDAF:ty, $AGGREGATE_UDF_FN:ident, $CREATE:expr) => {
- paste::paste! {
#[doc = concat!("AggregateFunction that returns a
[`AggregateUDF`](datafusion_expr::AggregateUDF) for [`", stringify!($UDAF),
"`]")]
pub fn $AGGREGATE_UDF_FN() ->
std::sync::Arc<datafusion_expr::AggregateUDF> {
// Singleton instance of [$UDAF], ensures the UDAF is only
created once
@@ -76,7 +75,6 @@ macro_rules! create_func {
std::sync::Arc::new(datafusion_expr::AggregateUDF::from($CREATE))
});
std::sync::Arc::clone(&INSTANCE)
- }
}
}
}
diff --git a/datafusion/functions-nested/Cargo.toml
b/datafusion/functions-nested/Cargo.toml
index 0fdb69e6e7..5fce3e854e 100644
--- a/datafusion/functions-nested/Cargo.toml
+++ b/datafusion/functions-nested/Cargo.toml
@@ -61,7 +61,6 @@ hashbrown = { workspace = true }
itertools = { workspace = true, features = ["use_std"] }
itoa = { workspace = true }
log = { workspace = true }
-paste = { workspace = true }
[dev-dependencies]
criterion = { workspace = true, features = ["async_tokio"] }
diff --git a/datafusion/functions-nested/src/macros.rs
b/datafusion/functions-nested/src/macros.rs
index 5380f6b127..5f12113150 100644
--- a/datafusion/functions-nested/src/macros.rs
+++ b/datafusion/functions-nested/src/macros.rs
@@ -50,7 +50,6 @@ macro_rules! make_udf_expr_and_func {
make_udf_expr_and_func!($UDF, $EXPR_FN, $($arg)*, $DOC,
$SCALAR_UDF_FN, $UDF::new);
};
($UDF:ident, $EXPR_FN:ident, $($arg:ident)*, $DOC:expr,
$SCALAR_UDF_FN:ident, $CTOR:path) => {
- paste::paste! {
// "fluent expr_fn" style function
#[doc = $DOC]
pub fn $EXPR_FN($($arg: datafusion_expr::Expr),*) ->
datafusion_expr::Expr {
@@ -60,13 +59,11 @@ macro_rules! make_udf_expr_and_func {
))
}
create_func!($UDF, $SCALAR_UDF_FN, $CTOR);
- }
};
($UDF:ident, $EXPR_FN:ident, $DOC:expr, $SCALAR_UDF_FN:ident) => {
make_udf_expr_and_func!($UDF, $EXPR_FN, $DOC, $SCALAR_UDF_FN,
$UDF::new);
};
($UDF:ident, $EXPR_FN:ident, $DOC:expr, $SCALAR_UDF_FN:ident, $CTOR:path)
=> {
- paste::paste! {
// "fluent expr_fn" style function
#[doc = $DOC]
pub fn $EXPR_FN(arg: Vec<datafusion_expr::Expr>) ->
datafusion_expr::Expr {
@@ -76,7 +73,6 @@ macro_rules! make_udf_expr_and_func {
))
}
create_func!($UDF, $SCALAR_UDF_FN, $CTOR);
- }
};
}
@@ -97,7 +93,6 @@ macro_rules! create_func {
create_func!($UDF, $SCALAR_UDF_FN, $UDF::new);
};
($UDF:ident, $SCALAR_UDF_FN:ident, $CTOR:path) => {
- paste::paste! {
#[doc = concat!("ScalarFunction that returns a
[`ScalarUDF`](datafusion_expr::ScalarUDF) for ")]
#[doc = stringify!($UDF)]
pub fn $SCALAR_UDF_FN() ->
std::sync::Arc<datafusion_expr::ScalarUDF> {
@@ -110,6 +105,5 @@ macro_rules! create_func {
});
std::sync::Arc::clone(&INSTANCE)
}
- }
};
}
diff --git a/datafusion/functions-table/Cargo.toml
b/datafusion/functions-table/Cargo.toml
index aa401fbd7d..4edb640cb2 100644
--- a/datafusion/functions-table/Cargo.toml
+++ b/datafusion/functions-table/Cargo.toml
@@ -48,7 +48,6 @@ datafusion-common = { workspace = true }
datafusion-expr = { workspace = true }
datafusion-physical-plan = { workspace = true }
parking_lot = { workspace = true }
-paste = { workspace = true }
[dev-dependencies]
arrow = { workspace = true, features = ["test_utils"] }
diff --git a/datafusion/functions-table/src/lib.rs
b/datafusion/functions-table/src/lib.rs
index cd9ade041a..668e964901 100644
--- a/datafusion/functions-table/src/lib.rs
+++ b/datafusion/functions-table/src/lib.rs
@@ -38,25 +38,27 @@ pub fn all_default_table_functions() ->
Vec<Arc<TableFunction>> {
/// Creates a singleton instance of a table function
/// - `$module`: A struct implementing `TableFunctionImpl` to create the
function from
/// - `$name`: The name to give to the created function
-///
-/// This is used to ensure creating the list of `TableFunction` only happens
once.
+/// - `$func_name`: The name of the function to be called
+/// This is used to ensure creating the list of `TableFunction` only happens
once.
#[macro_export]
macro_rules! create_udtf_function {
- ($module:path, $name:expr) => {
- paste::paste! {
- pub fn [<$name:lower>]() -> Arc<TableFunction> {
- static INSTANCE: std::sync::LazyLock<Arc<TableFunction>> =
- std::sync::LazyLock::new(|| {
- std::sync::Arc::new(TableFunction::new(
- $name.to_string(),
- Arc::new($module {}),
- ))
- });
- std::sync::Arc::clone(&INSTANCE)
- }
+ ($module:expr, $func_name:ident, $name:expr) => {
+ pub fn $func_name() -> Arc<TableFunction> {
+ static INSTANCE: std::sync::LazyLock<Arc<TableFunction>> =
+ std::sync::LazyLock::new(|| {
+ std::sync::Arc::new(TableFunction::new(
+ $name.to_string(),
+ Arc::new($module),
+ ))
+ });
+ std::sync::Arc::clone(&INSTANCE)
}
};
}
-create_udtf_function!(generate_series::GenerateSeriesFunc, "generate_series");
-create_udtf_function!(generate_series::RangeFunc, "range");
+create_udtf_function!(
+ generate_series::GenerateSeriesFunc {},
+ generate_series,
+ "generate_series"
+);
+create_udtf_function!(generate_series::RangeFunc {}, range, "range");
diff --git a/datafusion/functions-window/Cargo.toml
b/datafusion/functions-window/Cargo.toml
index fae71e180e..9c4342adae 100644
--- a/datafusion/functions-window/Cargo.toml
+++ b/datafusion/functions-window/Cargo.toml
@@ -50,7 +50,6 @@ datafusion-macros = { workspace = true }
datafusion-physical-expr = { workspace = true }
datafusion-physical-expr-common = { workspace = true }
log = { workspace = true }
-paste = { workspace = true }
[dev-dependencies]
arrow = { workspace = true, features = ["test_utils"] }
diff --git a/datafusion/functions-window/src/cume_dist.rs
b/datafusion/functions-window/src/cume_dist.rs
index dccb9148d2..8e1cb1b1e6 100644
--- a/datafusion/functions-window/src/cume_dist.rs
+++ b/datafusion/functions-window/src/cume_dist.rs
@@ -39,6 +39,7 @@ use std::sync::Arc;
define_udwf_and_expr!(
CumeDist,
cume_dist,
+ cume_dist_udwf,
"Calculates the cumulative distribution of a value in a group of values."
);
diff --git a/datafusion/functions-window/src/lead_lag.rs
b/datafusion/functions-window/src/lead_lag.rs
index 7569dac9ac..fbb84483e2 100644
--- a/datafusion/functions-window/src/lead_lag.rs
+++ b/datafusion/functions-window/src/lead_lag.rs
@@ -43,6 +43,7 @@ use std::sync::{Arc, LazyLock};
get_or_init_udwf!(
Lag,
lag,
+ lag_udwf,
"Returns the row value that precedes the current row by a specified \
offset within partition. If no such row exists, then returns the \
default value.",
@@ -51,6 +52,7 @@ get_or_init_udwf!(
get_or_init_udwf!(
Lead,
lead,
+ lead_udwf,
"Returns the value from a row that follows the current row by a \
specified offset within the partition. If no such row exists, then \
returns the default value.",
diff --git a/datafusion/functions-window/src/macros.rs
b/datafusion/functions-window/src/macros.rs
index 890ced90a9..aeb54356f8 100644
--- a/datafusion/functions-window/src/macros.rs
+++ b/datafusion/functions-window/src/macros.rs
@@ -30,8 +30,8 @@
///
/// * `$UDWF`: The struct which defines the
[`Signature`](datafusion_expr::Signature)
/// of the user-defined window function.
-/// * `$OUT_FN_NAME`: The basename to generate a unique function name like
-/// `$OUT_FN_NAME_udwf`.
+/// * `$OUT_FN_NAME`: The expression function name
+/// `UDWF_FN` : The unique function name
/// * `$DOC`: Doc comments for UDWF.
/// * (optional) `$CTOR`: Pass a custom constructor. When omitted it
/// automatically resolves to `$UDWF::default()`.
@@ -52,6 +52,7 @@
/// get_or_init_udwf!(
/// SimpleUDWF,
/// simple,
+/// simple_udwf,
/// "Simple user-defined window function doc comment."
/// );
/// #
@@ -94,16 +95,15 @@
/// ```
#[macro_export]
macro_rules! get_or_init_udwf {
- ($UDWF:ident, $OUT_FN_NAME:ident, $DOC:expr) => {
- get_or_init_udwf!($UDWF, $OUT_FN_NAME, $DOC, $UDWF::default);
+ ($UDWF:ident, $OUT_FN_NAME:ident, $UDWF_FN:ident, $DOC:expr) => {
+ get_or_init_udwf!($UDWF, $OUT_FN_NAME, $UDWF_FN, $DOC, $UDWF::default);
};
- ($UDWF:ident, $OUT_FN_NAME:ident, $DOC:expr, $CTOR:path) => {
- paste::paste! {
+ ($UDWF:ident, $OUT_FN_NAME:ident, $UDWF_FN:ident, $DOC:expr, $CTOR:path)
=> {
#[doc = concat!(" Returns a
[`WindowUDF`](datafusion_expr::WindowUDF) for [`", stringify!($OUT_FN_NAME),
"`].")]
#[doc = ""]
#[doc = concat!(" ", $DOC)]
- pub fn [<$OUT_FN_NAME _udwf>]() ->
std::sync::Arc<datafusion_expr::WindowUDF> {
+ pub fn $UDWF_FN() -> std::sync::Arc<datafusion_expr::WindowUDF> {
// Singleton instance of UDWF, ensures it is only created once.
static INSTANCE:
std::sync::LazyLock<std::sync::Arc<datafusion_expr::WindowUDF>> =
std::sync::LazyLock::new(|| {
@@ -111,7 +111,6 @@ macro_rules! get_or_init_udwf {
});
std::sync::Arc::clone(&INSTANCE)
}
- }
};
}
@@ -149,6 +148,7 @@ macro_rules! get_or_init_udwf {
/// # get_or_init_udwf!(
/// # RowNumber,
/// # row_number,
+/// # row_number_udwf,
/// # "Returns a unique row number for each row in window partition
beginning at 1."
/// # );
/// /// Creates `row_number()` API which has zero parameters:
@@ -163,6 +163,7 @@ macro_rules! get_or_init_udwf {
/// create_udwf_expr!(
/// RowNumber,
/// row_number,
+/// row_number_udwf,
/// "Returns a unique row number for each row in window partition
beginning at 1."
/// );
/// #
@@ -221,7 +222,7 @@ macro_rules! get_or_init_udwf {
/// # use datafusion_expr::{col, lit};
/// # use
datafusion_functions_window_common::partition::PartitionEvaluatorArgs;
/// #
-/// # get_or_init_udwf!(Lead, lead, "user-defined window function");
+/// # get_or_init_udwf!(Lead, lead,lead_udwf, "user-defined window function");
/// #
/// /// Creates `lead(expr, offset, default)` with 3 parameters:
/// ///
@@ -240,6 +241,7 @@ macro_rules! get_or_init_udwf {
/// Lead,
/// lead,
/// [expr, offset, default],
+/// lead_udwf,
/// "Returns a value evaluated at the row that is offset rows after the
current row within the partition."
/// );
/// #
@@ -298,21 +300,18 @@ macro_rules! get_or_init_udwf {
#[macro_export]
macro_rules! create_udwf_expr {
// zero arguments
- ($UDWF:ident, $OUT_FN_NAME:ident, $DOC:expr) => {
- paste::paste! {
+ ($UDWF:ident, $OUT_FN_NAME:ident, $UDWF_FN:ident, $DOC:expr) => {
#[doc = " Create a
[`WindowFunction`](datafusion_expr::Expr::WindowFunction) expression for"]
#[doc = concat!(" `", stringify!($UDWF), "` user-defined window
function.")]
#[doc = ""]
#[doc = concat!(" ", $DOC)]
pub fn $OUT_FN_NAME() -> datafusion_expr::Expr {
- [<$OUT_FN_NAME _udwf>]().call(vec![])
+ $UDWF_FN().call(vec![])
}
- }
};
// 1 or more arguments
- ($UDWF:ident, $OUT_FN_NAME:ident, [$($PARAM:ident),+], $DOC:expr) => {
- paste::paste! {
+ ($UDWF:ident, $OUT_FN_NAME:ident, [$($PARAM:ident),+], $UDWF_FN:ident,
$DOC:expr) => {
#[doc = " Create a
[`WindowFunction`](datafusion_expr::Expr::WindowFunction) expression for"]
#[doc = concat!(" `", stringify!($UDWF), "` user-defined window
function.")]
#[doc = ""]
@@ -320,10 +319,9 @@ macro_rules! create_udwf_expr {
pub fn $OUT_FN_NAME(
$($PARAM: datafusion_expr::Expr),+
) -> datafusion_expr::Expr {
- [<$OUT_FN_NAME _udwf>]()
+ $UDWF_FN()
.call(vec![$($PARAM),+])
}
- }
};
}
@@ -374,6 +372,7 @@ macro_rules! create_udwf_expr {
/// define_udwf_and_expr!(
/// SimpleUDWF,
/// simple,
+/// simple_udwf,
/// "a simple user-defined window function"
/// );
/// #
@@ -437,6 +436,7 @@ macro_rules! create_udwf_expr {
/// define_udwf_and_expr!(
/// RowNumber,
/// row_number,
+/// row_number_udwf,
/// "Returns a unique row number for each row in window partition
beginning at 1.",
/// RowNumber::new // <-- custom constructor
/// );
@@ -514,6 +514,7 @@ macro_rules! create_udwf_expr {
/// Lead,
/// lead,
/// [expr, offset, default], // <- 3 parameters
+/// lead_udwf,
/// "user-defined window function"
/// );
/// #
@@ -603,6 +604,7 @@ macro_rules! create_udwf_expr {
/// Lead,
/// lead,
/// [expr, offset, default], // <- 3 parameters
+/// lead_udwf,
/// "user-defined window function",
/// Lead::new // <- Custom constructor
/// );
@@ -663,29 +665,29 @@ macro_rules! create_udwf_expr {
macro_rules! define_udwf_and_expr {
// Defines UDWF with default constructor
// Defines expression API with zero parameters
- ($UDWF:ident, $OUT_FN_NAME:ident, $DOC:expr) => {
- get_or_init_udwf!($UDWF, $OUT_FN_NAME, $DOC);
- create_udwf_expr!($UDWF, $OUT_FN_NAME, $DOC);
+ ($UDWF:ident, $OUT_FN_NAME:ident, $UDWF_FN:ident, $DOC:expr) => {
+ get_or_init_udwf!($UDWF, $OUT_FN_NAME,$UDWF_FN, $DOC);
+ create_udwf_expr!($UDWF, $OUT_FN_NAME, $UDWF_FN, $DOC);
};
// Defines UDWF by passing a custom constructor
// Defines expression API with zero parameters
- ($UDWF:ident, $OUT_FN_NAME:ident, $DOC:expr, $CTOR:path) => {
- get_or_init_udwf!($UDWF, $OUT_FN_NAME, $DOC, $CTOR);
- create_udwf_expr!($UDWF, $OUT_FN_NAME, $DOC);
+ ($UDWF:ident, $OUT_FN_NAME:ident, $UDWF_FN:ident, $DOC:expr, $CTOR:path)
=> {
+ get_or_init_udwf!($UDWF, $OUT_FN_NAME, $UDWF_FN, $DOC, $CTOR);
+ create_udwf_expr!($UDWF, $OUT_FN_NAME, $UDWF_FN, $DOC);
};
// Defines UDWF with default constructor
// Defines expression API with multiple parameters
- ($UDWF:ident, $OUT_FN_NAME:ident, [$($PARAM:ident),+], $DOC:expr) => {
- get_or_init_udwf!($UDWF, $OUT_FN_NAME, $DOC);
- create_udwf_expr!($UDWF, $OUT_FN_NAME, [$($PARAM),+], $DOC);
+ ($UDWF:ident, $OUT_FN_NAME:ident, [$($PARAM:ident),+],$UDWF_FN:ident,
$DOC:expr) => {
+ get_or_init_udwf!($UDWF, $OUT_FN_NAME, $UDWF_FN, $DOC);
+ create_udwf_expr!($UDWF, $OUT_FN_NAME, [$($PARAM),+], $UDWF_FN, $DOC);
};
// Defines UDWF by passing a custom constructor
// Defines expression API with multiple parameters
- ($UDWF:ident, $OUT_FN_NAME:ident, [$($PARAM:ident),+], $DOC:expr,
$CTOR:path) => {
- get_or_init_udwf!($UDWF, $OUT_FN_NAME, $DOC, $CTOR);
- create_udwf_expr!($UDWF, $OUT_FN_NAME, [$($PARAM),+], $DOC);
+ ($UDWF:ident, $OUT_FN_NAME:ident, [$($PARAM:ident),+], $UDWF_FN:ident,
$DOC:expr, $CTOR:path) => {
+ get_or_init_udwf!($UDWF, $OUT_FN_NAME, $UDWF_FN, $DOC, $CTOR);
+ create_udwf_expr!($UDWF, $OUT_FN_NAME, [$($PARAM),+], $UDWF_FN, $DOC);
};
}
diff --git a/datafusion/functions-window/src/nth_value.rs
b/datafusion/functions-window/src/nth_value.rs
index 8d37cf7e60..12b4146cc2 100644
--- a/datafusion/functions-window/src/nth_value.rs
+++ b/datafusion/functions-window/src/nth_value.rs
@@ -45,6 +45,7 @@ define_udwf_and_expr!(
First,
first_value,
[arg],
+ first_value_udwf,
"Returns the first value in the window frame",
NthValue::first
);
@@ -52,12 +53,14 @@ define_udwf_and_expr!(
Last,
last_value,
[arg],
+ last_value_udwf,
"Returns the last value in the window frame",
NthValue::last
);
get_or_init_udwf!(
NthValue,
nth_value,
+ nth_value_udwf,
"Returns the nth value in the window frame",
NthValue::nth
);
diff --git a/datafusion/functions-window/src/ntile.rs
b/datafusion/functions-window/src/ntile.rs
index 21ce2795b4..1f9b2344e5 100644
--- a/datafusion/functions-window/src/ntile.rs
+++ b/datafusion/functions-window/src/ntile.rs
@@ -40,6 +40,7 @@ define_udwf_and_expr!(
Ntile,
ntile,
[arg],
+ ntile_udwf,
"Integer ranging from 1 to the argument value, dividing the partition as
equally as possible."
);
diff --git a/datafusion/functions-window/src/rank.rs
b/datafusion/functions-window/src/rank.rs
index 9d5af64eb9..ee8546703b 100644
--- a/datafusion/functions-window/src/rank.rs
+++ b/datafusion/functions-window/src/rank.rs
@@ -44,6 +44,7 @@ use std::sync::{Arc, LazyLock};
define_udwf_and_expr!(
Rank,
rank,
+ rank_udwf,
"Returns rank of the current row with gaps. Same as `row_number` of its
first peer",
Rank::basic
);
@@ -51,6 +52,7 @@ define_udwf_and_expr!(
define_udwf_and_expr!(
DenseRank,
dense_rank,
+ dense_rank_udwf,
"Returns rank of the current row without gaps. This function counts peer
groups",
Rank::dense_rank
);
@@ -58,6 +60,7 @@ define_udwf_and_expr!(
define_udwf_and_expr!(
PercentRank,
percent_rank,
+ percent_rank_udwf,
"Returns the relative rank of the current row: (rank - 1) / (total rows -
1)",
Rank::percent_rank
);
diff --git a/datafusion/functions-window/src/row_number.rs
b/datafusion/functions-window/src/row_number.rs
index d7d298cece..cd60e51def 100644
--- a/datafusion/functions-window/src/row_number.rs
+++ b/datafusion/functions-window/src/row_number.rs
@@ -40,6 +40,7 @@ use std::sync::Arc;
define_udwf_and_expr!(
RowNumber,
row_number,
+ row_number_udwf,
"Returns a unique row number for each row in window partition beginning at
1."
);
diff --git a/datafusion/physical-expr/Cargo.toml
b/datafusion/physical-expr/Cargo.toml
index 7a52441477..d6cb212737 100644
--- a/datafusion/physical-expr/Cargo.toml
+++ b/datafusion/physical-expr/Cargo.toml
@@ -65,7 +65,6 @@ arrow = { workspace = true, features = ["test_utils"] }
criterion = { workspace = true }
datafusion-functions = { workspace = true }
insta = { workspace = true }
-paste = { workspace = true }
rand = { workspace = true }
rstest = { workspace = true }
diff --git a/datafusion/physical-expr/src/expressions/negative.rs
b/datafusion/physical-expr/src/expressions/negative.rs
index c727c8fa5f..c78bbe999e 100644
--- a/datafusion/physical-expr/src/expressions/negative.rs
+++ b/datafusion/physical-expr/src/expressions/negative.rs
@@ -214,10 +214,9 @@ mod tests {
use datafusion_common::{DataFusionError, ScalarValue};
use datafusion_physical_expr_common::physical_expr::fmt_sql;
- use paste::paste;
macro_rules! test_array_negative_op {
- ($DATA_TY:tt, $($VALUE:expr),* ) => {
+ ($DATA_TY:tt, $ARRAY_TY:ty, $($VALUE:expr),* ) => {
let schema = Schema::new(vec![Field::new("a", DataType::$DATA_TY,
true)]);
let expr = negative(col("a", &schema)?, &schema)?;
assert_eq!(expr.data_type(&schema)?, DataType::$DATA_TY);
@@ -230,8 +229,8 @@ mod tests {
)+
arr.push(None);
arr_expected.push(None);
- let input = paste!{[<$DATA_TY Array>]::from(arr)};
- let expected = &paste!{[<$DATA_TY Array>]::from(arr_expected)};
+ let input = <$ARRAY_TY>::from(arr);
+ let expected = &<$ARRAY_TY>::from(arr_expected);
let batch =
RecordBatch::try_new(Arc::new(schema.clone()),
vec![Arc::new(input)])?;
let result =
expr.evaluate(&batch)?.into_array(batch.num_rows()).expect("Failed to convert
to array");
@@ -243,12 +242,12 @@ mod tests {
#[test]
fn array_negative_op() -> Result<()> {
- test_array_negative_op!(Int8, 2i8, 1i8);
- test_array_negative_op!(Int16, 234i16, 123i16);
- test_array_negative_op!(Int32, 2345i32, 1234i32);
- test_array_negative_op!(Int64, 23456i64, 12345i64);
- test_array_negative_op!(Float32, 2345.0f32, 1234.0f32);
- test_array_negative_op!(Float64, 23456.0f64, 12345.0f64);
+ test_array_negative_op!(Int8, Int8Array, 2i8, 1i8);
+ test_array_negative_op!(Int16, Int16Array, 234i16, 123i16);
+ test_array_negative_op!(Int32, Int32Array, 2345i32, 1234i32);
+ test_array_negative_op!(Int64, Int64Array, 23456i64, 12345i64);
+ test_array_negative_op!(Float32, Float32Array, 2345.0f32, 1234.0f32);
+ test_array_negative_op!(Float64, Float64Array, 23456.0f64, 12345.0f64);
Ok(())
}
diff --git a/datafusion/sql/Cargo.toml b/datafusion/sql/Cargo.toml
index b7338cb764..cc299ce507 100644
--- a/datafusion/sql/Cargo.toml
+++ b/datafusion/sql/Cargo.toml
@@ -73,5 +73,4 @@ datafusion-functions-window = { workspace = true }
env_logger = { workspace = true }
insta = { workspace = true }
itertools = { workspace = true }
-paste = { workspace = true }
rstest = { workspace = true }
diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs
index cd42b68949..79d2bd6ad8 100644
--- a/datafusion/sql/src/expr/mod.rs
+++ b/datafusion/sql/src/expr/mod.rs
@@ -1333,46 +1333,42 @@ mod tests {
}
macro_rules! test_stack_overflow {
- ($num_expr:expr) => {
- paste::item! {
- #[test]
- fn [<test_stack_overflow_ $num_expr>]() {
- let schema = DFSchema::empty();
- let mut planner_context = PlannerContext::default();
-
- let expr_str = (0..$num_expr)
- .map(|i| format!("column1 = 'value{:?}'", i))
- .collect::<Vec<String>>()
- .join(" OR ");
-
- let dialect = GenericDialect{};
- let mut parser = Parser::new(&dialect)
- .try_with_sql(expr_str.as_str())
- .unwrap();
- let sql_expr = parser.parse_expr().unwrap();
-
- let context_provider = TestContextProvider::new();
- let sql_to_rel = SqlToRel::new(&context_provider);
-
- // Should not stack overflow
- sql_to_rel.sql_expr_to_logical_expr(
- sql_expr,
- &schema,
- &mut planner_context,
- ).unwrap();
- }
+ ($name:ident, $num_expr:expr) => {
+ #[test]
+ fn $name() {
+ let schema = DFSchema::empty();
+ let mut planner_context = PlannerContext::default();
+
+ let expr_str = (0..$num_expr)
+ .map(|i| format!("column1 = 'value{:?}'", i))
+ .collect::<Vec<String>>()
+ .join(" OR ");
+
+ let dialect = GenericDialect {};
+ let mut parser = Parser::new(&dialect)
+ .try_with_sql(expr_str.as_str())
+ .unwrap();
+ let sql_expr = parser.parse_expr().unwrap();
+
+ let context_provider = TestContextProvider::new();
+ let sql_to_rel = SqlToRel::new(&context_provider);
+
+ // Should not stack overflow
+ sql_to_rel
+ .sql_expr_to_logical_expr(sql_expr, &schema, &mut
planner_context)
+ .unwrap();
}
};
}
- test_stack_overflow!(64);
- test_stack_overflow!(128);
- test_stack_overflow!(256);
- test_stack_overflow!(512);
- test_stack_overflow!(1024);
- test_stack_overflow!(2048);
- test_stack_overflow!(4096);
- test_stack_overflow!(8192);
+ test_stack_overflow!(test_stack_overflow_64, 64);
+ test_stack_overflow!(test_stack_overflow_128, 128);
+ test_stack_overflow!(test_stack_overflow_256, 256);
+ test_stack_overflow!(test_stack_overflow_512, 512);
+ test_stack_overflow!(test_stack_overflow_1024, 1024);
+ test_stack_overflow!(test_stack_overflow_2048, 2048);
+ test_stack_overflow!(test_stack_overflow_4096, 4096);
+ test_stack_overflow!(test_stack_overflow_8192, 8192);
#[test]
fn test_sql_to_expr_with_alias() {
let schema = DFSchema::empty();
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]