This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 69ba82f03e Fix: dataframe_subquery example Optimizer rule
`common_sub_expression_eliminate` failed (#8016)
69ba82f03e is described below
commit 69ba82f03e3fa76a18a712fa228402f6d9324879
Author: jokercurry <[email protected]>
AuthorDate: Wed Nov 1 19:16:50 2023 +0800
Fix: dataframe_subquery example Optimizer rule
`common_sub_expression_eliminate` failed (#8016)
* Fix: Optimizer rule 'common_sub_expression_eliminate' failed
* nit
* nit
* nit
---------
Co-authored-by: zhongjingxiong <[email protected]>
---
datafusion-examples/examples/dataframe_subquery.rs | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/datafusion-examples/examples/dataframe_subquery.rs
b/datafusion-examples/examples/dataframe_subquery.rs
index 94049e59b3..9fb61008b9 100644
--- a/datafusion-examples/examples/dataframe_subquery.rs
+++ b/datafusion-examples/examples/dataframe_subquery.rs
@@ -15,6 +15,7 @@
// specific language governing permissions and limitations
// under the License.
+use arrow_schema::DataType;
use std::sync::Arc;
use datafusion::error::Result;
@@ -38,7 +39,7 @@ async fn main() -> Result<()> {
Ok(())
}
-//select c1,c2 from t1 where (select avg(t2.c2) from t2 where t1.c1 = t2.c1)>0
limit 10;
+//select c1,c2 from t1 where (select avg(t2.c2) from t2 where t1.c1 = t2.c1)>0
limit 3;
async fn where_scalar_subquery(ctx: &SessionContext) -> Result<()> {
ctx.table("t1")
.await?
@@ -46,7 +47,7 @@ async fn where_scalar_subquery(ctx: &SessionContext) ->
Result<()> {
scalar_subquery(Arc::new(
ctx.table("t2")
.await?
- .filter(col("t1.c1").eq(col("t2.c1")))?
+ .filter(out_ref_col(DataType::Utf8,
"t1.c1").eq(col("t2.c1")))?
.aggregate(vec![], vec![avg(col("t2.c2"))])?
.select(vec![avg(col("t2.c2"))])?
.into_unoptimized_plan(),
@@ -60,7 +61,7 @@ async fn where_scalar_subquery(ctx: &SessionContext) ->
Result<()> {
Ok(())
}
-//SELECT t1.c1, t1.c2 FROM t1 WHERE t1.c2 in (select max(t2.c2) from t2 where
t2.c1 > 0 ) limit 10
+//SELECT t1.c1, t1.c2 FROM t1 WHERE t1.c2 in (select max(t2.c2) from t2 where
t2.c1 > 0 ) limit 3;
async fn where_in_subquery(ctx: &SessionContext) -> Result<()> {
ctx.table("t1")
.await?
@@ -82,14 +83,14 @@ async fn where_in_subquery(ctx: &SessionContext) ->
Result<()> {
Ok(())
}
-//SELECT t1.c1, t1.c2 FROM t1 WHERE EXISTS (select t2.c2 from t2 where t1.c1 =
t2.c1) limit 10
+//SELECT t1.c1, t1.c2 FROM t1 WHERE EXISTS (select t2.c2 from t2 where t1.c1 =
t2.c1) limit 3;
async fn where_exist_subquery(ctx: &SessionContext) -> Result<()> {
ctx.table("t1")
.await?
.filter(exists(Arc::new(
ctx.table("t2")
.await?
- .filter(col("t1.c1").eq(col("t2.c1")))?
+ .filter(out_ref_col(DataType::Utf8, "t1.c1").eq(col("t2.c1")))?
.select(vec![col("t2.c2")])?
.into_unoptimized_plan(),
)))?