This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 69ba82f03e Fix: dataframe_subquery example Optimizer rule 
`common_sub_expression_eliminate` failed (#8016)
69ba82f03e is described below

commit 69ba82f03e3fa76a18a712fa228402f6d9324879
Author: jokercurry <[email protected]>
AuthorDate: Wed Nov 1 19:16:50 2023 +0800

    Fix: dataframe_subquery example Optimizer rule 
`common_sub_expression_eliminate` failed (#8016)
    
    * Fix: Optimizer rule 'common_sub_expression_eliminate' failed
    
    * nit
    
    * nit
    
    * nit
    
    ---------
    
    Co-authored-by: zhongjingxiong <[email protected]>
---
 datafusion-examples/examples/dataframe_subquery.rs | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/datafusion-examples/examples/dataframe_subquery.rs 
b/datafusion-examples/examples/dataframe_subquery.rs
index 94049e59b3..9fb61008b9 100644
--- a/datafusion-examples/examples/dataframe_subquery.rs
+++ b/datafusion-examples/examples/dataframe_subquery.rs
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use arrow_schema::DataType;
 use std::sync::Arc;
 
 use datafusion::error::Result;
@@ -38,7 +39,7 @@ async fn main() -> Result<()> {
     Ok(())
 }
 
-//select c1,c2 from t1 where (select avg(t2.c2) from t2 where t1.c1 = t2.c1)>0 
limit 10;
+//select c1,c2 from t1 where (select avg(t2.c2) from t2 where t1.c1 = t2.c1)>0 
limit 3;
 async fn where_scalar_subquery(ctx: &SessionContext) -> Result<()> {
     ctx.table("t1")
         .await?
@@ -46,7 +47,7 @@ async fn where_scalar_subquery(ctx: &SessionContext) -> 
Result<()> {
             scalar_subquery(Arc::new(
                 ctx.table("t2")
                     .await?
-                    .filter(col("t1.c1").eq(col("t2.c1")))?
+                    .filter(out_ref_col(DataType::Utf8, 
"t1.c1").eq(col("t2.c1")))?
                     .aggregate(vec![], vec![avg(col("t2.c2"))])?
                     .select(vec![avg(col("t2.c2"))])?
                     .into_unoptimized_plan(),
@@ -60,7 +61,7 @@ async fn where_scalar_subquery(ctx: &SessionContext) -> 
Result<()> {
     Ok(())
 }
 
-//SELECT t1.c1, t1.c2 FROM t1 WHERE t1.c2 in (select max(t2.c2) from t2 where 
t2.c1 > 0 ) limit 10
+//SELECT t1.c1, t1.c2 FROM t1 WHERE t1.c2 in (select max(t2.c2) from t2 where 
t2.c1 > 0 ) limit 3;
 async fn where_in_subquery(ctx: &SessionContext) -> Result<()> {
     ctx.table("t1")
         .await?
@@ -82,14 +83,14 @@ async fn where_in_subquery(ctx: &SessionContext) -> 
Result<()> {
     Ok(())
 }
 
-//SELECT t1.c1, t1.c2 FROM t1 WHERE EXISTS (select t2.c2 from t2 where t1.c1 = 
t2.c1) limit 10
+//SELECT t1.c1, t1.c2 FROM t1 WHERE EXISTS (select t2.c2 from t2 where t1.c1 = 
t2.c1) limit 3;
 async fn where_exist_subquery(ctx: &SessionContext) -> Result<()> {
     ctx.table("t1")
         .await?
         .filter(exists(Arc::new(
             ctx.table("t2")
                 .await?
-                .filter(col("t1.c1").eq(col("t2.c1")))?
+                .filter(out_ref_col(DataType::Utf8, "t1.c1").eq(col("t2.c1")))?
                 .select(vec![col("t2.c2")])?
                 .into_unoptimized_plan(),
         )))?

Reply via email to