FYI - I was running my tests on the v1.26 release. When I ran the same test
on master, it seems the query does get decorrelated. Have there been some
recent bugs or bugfixes in that area?

with customer_total_return as
(select sr_customer_sk as ctr_customer_sk
,sr_store_sk as ctr_store_sk
,sum(sr_return_amt) as ctr_total_return
from store_returns
,date_dim
where sr_returned_date_sk = d_date_sk
and d_year = 2000
group by sr_customer_sk
,sr_store_sk)
 select  c_customer_id
from customer_total_return ctr1
,store
,customer
where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2
from customer_total_return ctr2
where ctr1.ctr_store_sk = ctr2.ctr_store_sk)
and s_store_sk = ctr1.ctr_store_sk
and s_state = 'TN'
and ctr1.ctr_customer_sk = c_customer_sk
order by c_customer_id
limit 100


*[Initial Logical Plan]*LogicalSort(sort0=[$0], dir0=[ASC], fetch=[100])
  LogicalProject(C_CUSTOMER_ID=[$33])
    LogicalFilter(condition=[AND(>($2, $50), =($3, $1), =($27, 'TN'), =($0,
$32))])
      LogicalCorrelate(correlation=[$cor0], joinType=[left],
requiredColumns=[{1}])
        LogicalJoin(condition=[true], joinType=[inner])
          LogicalJoin(condition=[true], joinType=[inner])
            LogicalProject(CTR_CUSTOMER_SK=[$0], CTR_STORE_SK=[$1],
CTR_TOTAL_RETURN=[$2])
              LogicalAggregate(group=[{0, 1}], CTR_TOTAL_RETURN=[SUM($2)])
                LogicalProject(sr_customer_sk=[$3], sr_store_sk=[$7],
sr_return_amt=[$11])
                  LogicalFilter(condition=[AND(=($0, $20), =($26, 2000))])
                    LogicalJoin(condition=[true], joinType=[inner])
                      LogicalTableScan(table=[[store_returns]])
                      LogicalTableScan(table=[[date_dim]])
            LogicalTableScan(table=[[store]])
          LogicalTableScan(table=[[customer]])
        LogicalProject(EXPR$0=[*($0, 1.2:DECIMAL(2, 1))])
          LogicalAggregate(group=[{}], agg#0=[AVG($0)])
            LogicalProject(CTR_TOTAL_RETURN=[$2])
              LogicalFilter(condition=[=($cor0.CTR_STORE_SK, $1)])
                LogicalProject(CTR_CUSTOMER_SK=[$0], CTR_STORE_SK=[$1],
CTR_TOTAL_RETURN=[$2])
                  LogicalAggregate(group=[{0, 1}],
CTR_TOTAL_RETURN=[SUM($2)])
                    LogicalProject(sr_customer_sk=[$3], sr_store_sk=[$7],
sr_return_amt=[$11])
                      LogicalFilter(condition=[AND(=($0, $20), =($26,
2000))])
                        LogicalJoin(condition=[true], joinType=[inner])
                          LogicalTableScan(table=[[store_returns]])
                          LogicalTableScan(table=[[date_dim]])


*[Decorrelated Logical Plan]*LogicalSort(sort0=[$0], dir0=[ASC],
fetch=[100])
  LogicalProject(C_CUSTOMER_ID=[$33])
    LogicalProject(CTR_CUSTOMER_SK=[$0], CTR_STORE_SK=[$1],
CTR_TOTAL_RETURN=[$2], s_store_sk=[$3], s_store_id=[$4],
s_rec_start_date=[$5], s_rec_end_date=[$6], s_closed_date_sk=[$7],
s_store_name=[$8], s_number_employees=[$9], s_floor_space=[$10],
s_hours=[$11], s_manager=[$12], s_market_id=[$13], s_geography_class=[$14],
s_market_desc=[$15], s_market_manager=[$16], s_division_id=[$17],
s_division_name=[$18], s_company_id=[$19], s_company_name=[$20],
s_street_number=[$21], s_street_name=[$22], s_street_type=[$23],
s_suite_number=[$24], s_city=[$25], s_county=[$26], s_state=[$27],
s_zip=[$28], s_country=[$29], s_gmt_offset=[$30], s_tax_precentage=[$31],
c_customer_sk=[$32], c_customer_id=[$33], c_current_cdemo_sk=[$34],
c_current_hdemo_sk=[$35], c_current_addr_sk=[$36],
c_first_shipto_date_sk=[$37], c_first_sales_date_sk=[$38],
c_salutation=[$39], c_first_name=[$40], c_last_name=[$41],
c_preferred_cust_flag=[$42], c_birth_day=[$43], c_birth_month=[$44],
c_birth_year=[$45], c_birth_country=[$46], c_login=[$47],
c_email_address=[$48], c_last_review_date_sk=[$49],
CTR_STORE_SK0=[CAST($50):BIGINT], $f1=[CAST($51):DOUBLE])
      LogicalJoin(condition=[AND(=($1, $50), >($2, *($51, 1.2:DECIMAL(2,
1))))], joinType=[inner])
        LogicalJoin(condition=[=($0, $32)], joinType=[inner])
          LogicalJoin(condition=[=($3, $1)], joinType=[inner])
            LogicalProject(CTR_CUSTOMER_SK=[$0], CTR_STORE_SK=[$1],
CTR_TOTAL_RETURN=[$2])
              LogicalAggregate(group=[{0, 1}], CTR_TOTAL_RETURN=[SUM($2)])
                LogicalProject(sr_customer_sk=[$3], sr_store_sk=[$7],
sr_return_amt=[$11])
                  LogicalJoin(condition=[=($0, $20)], joinType=[inner])
                    LogicalTableScan(table=[[store_returns]])
                    LogicalFilter(condition=[=($6, 2000)])
                      LogicalTableScan(table=[[date_dim]])
            LogicalFilter(condition=[=($24, 'TN')])
              LogicalTableScan(table=[[store]])
          LogicalTableScan(table=[[customer]])
        LogicalAggregate(group=[{0}], agg#0=[AVG($1)])
          LogicalProject(CTR_STORE_SK=[$1], CTR_TOTAL_RETURN=[$2])
            LogicalAggregate(group=[{0, 1}], CTR_TOTAL_RETURN=[SUM($2)])
              LogicalProject(sr_customer_sk=[$3], sr_store_sk=[$7],
sr_return_amt=[$11])
                LogicalJoin(condition=[=($0, $20)], joinType=[inner])
                  LogicalTableScan(table=[[store_returns]])
                  LogicalFilter(condition=[=($6, 2000)])
                    LogicalTableScan(table=[[date_dim]])

Reply via email to