FYI - I was running my tests on the v1.26 release. When I ran the same test
on master, it seems the query does get decorrelated. Have there been some
recent bugs or bugfixes in that area?
with customer_total_return as
(select sr_customer_sk as ctr_customer_sk
,sr_store_sk as ctr_store_sk
,sum(sr_return_amt) as ctr_total_return
from store_returns
,date_dim
where sr_returned_date_sk = d_date_sk
and d_year = 2000
group by sr_customer_sk
,sr_store_sk)
select c_customer_id
from customer_total_return ctr1
,store
,customer
where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2
from customer_total_return ctr2
where ctr1.ctr_store_sk = ctr2.ctr_store_sk)
and s_store_sk = ctr1.ctr_store_sk
and s_state = 'TN'
and ctr1.ctr_customer_sk = c_customer_sk
order by c_customer_id
limit 100
*[Initial Logical Plan]*LogicalSort(sort0=[$0], dir0=[ASC], fetch=[100])
LogicalProject(C_CUSTOMER_ID=[$33])
LogicalFilter(condition=[AND(>($2, $50), =($3, $1), =($27, 'TN'), =($0,
$32))])
LogicalCorrelate(correlation=[$cor0], joinType=[left],
requiredColumns=[{1}])
LogicalJoin(condition=[true], joinType=[inner])
LogicalJoin(condition=[true], joinType=[inner])
LogicalProject(CTR_CUSTOMER_SK=[$0], CTR_STORE_SK=[$1],
CTR_TOTAL_RETURN=[$2])
LogicalAggregate(group=[{0, 1}], CTR_TOTAL_RETURN=[SUM($2)])
LogicalProject(sr_customer_sk=[$3], sr_store_sk=[$7],
sr_return_amt=[$11])
LogicalFilter(condition=[AND(=($0, $20), =($26, 2000))])
LogicalJoin(condition=[true], joinType=[inner])
LogicalTableScan(table=[[store_returns]])
LogicalTableScan(table=[[date_dim]])
LogicalTableScan(table=[[store]])
LogicalTableScan(table=[[customer]])
LogicalProject(EXPR$0=[*($0, 1.2:DECIMAL(2, 1))])
LogicalAggregate(group=[{}], agg#0=[AVG($0)])
LogicalProject(CTR_TOTAL_RETURN=[$2])
LogicalFilter(condition=[=($cor0.CTR_STORE_SK, $1)])
LogicalProject(CTR_CUSTOMER_SK=[$0], CTR_STORE_SK=[$1],
CTR_TOTAL_RETURN=[$2])
LogicalAggregate(group=[{0, 1}],
CTR_TOTAL_RETURN=[SUM($2)])
LogicalProject(sr_customer_sk=[$3], sr_store_sk=[$7],
sr_return_amt=[$11])
LogicalFilter(condition=[AND(=($0, $20), =($26,
2000))])
LogicalJoin(condition=[true], joinType=[inner])
LogicalTableScan(table=[[store_returns]])
LogicalTableScan(table=[[date_dim]])
*[Decorrelated Logical Plan]*LogicalSort(sort0=[$0], dir0=[ASC],
fetch=[100])
LogicalProject(C_CUSTOMER_ID=[$33])
LogicalProject(CTR_CUSTOMER_SK=[$0], CTR_STORE_SK=[$1],
CTR_TOTAL_RETURN=[$2], s_store_sk=[$3], s_store_id=[$4],
s_rec_start_date=[$5], s_rec_end_date=[$6], s_closed_date_sk=[$7],
s_store_name=[$8], s_number_employees=[$9], s_floor_space=[$10],
s_hours=[$11], s_manager=[$12], s_market_id=[$13], s_geography_class=[$14],
s_market_desc=[$15], s_market_manager=[$16], s_division_id=[$17],
s_division_name=[$18], s_company_id=[$19], s_company_name=[$20],
s_street_number=[$21], s_street_name=[$22], s_street_type=[$23],
s_suite_number=[$24], s_city=[$25], s_county=[$26], s_state=[$27],
s_zip=[$28], s_country=[$29], s_gmt_offset=[$30], s_tax_precentage=[$31],
c_customer_sk=[$32], c_customer_id=[$33], c_current_cdemo_sk=[$34],
c_current_hdemo_sk=[$35], c_current_addr_sk=[$36],
c_first_shipto_date_sk=[$37], c_first_sales_date_sk=[$38],
c_salutation=[$39], c_first_name=[$40], c_last_name=[$41],
c_preferred_cust_flag=[$42], c_birth_day=[$43], c_birth_month=[$44],
c_birth_year=[$45], c_birth_country=[$46], c_login=[$47],
c_email_address=[$48], c_last_review_date_sk=[$49],
CTR_STORE_SK0=[CAST($50):BIGINT], $f1=[CAST($51):DOUBLE])
LogicalJoin(condition=[AND(=($1, $50), >($2, *($51, 1.2:DECIMAL(2,
1))))], joinType=[inner])
LogicalJoin(condition=[=($0, $32)], joinType=[inner])
LogicalJoin(condition=[=($3, $1)], joinType=[inner])
LogicalProject(CTR_CUSTOMER_SK=[$0], CTR_STORE_SK=[$1],
CTR_TOTAL_RETURN=[$2])
LogicalAggregate(group=[{0, 1}], CTR_TOTAL_RETURN=[SUM($2)])
LogicalProject(sr_customer_sk=[$3], sr_store_sk=[$7],
sr_return_amt=[$11])
LogicalJoin(condition=[=($0, $20)], joinType=[inner])
LogicalTableScan(table=[[store_returns]])
LogicalFilter(condition=[=($6, 2000)])
LogicalTableScan(table=[[date_dim]])
LogicalFilter(condition=[=($24, 'TN')])
LogicalTableScan(table=[[store]])
LogicalTableScan(table=[[customer]])
LogicalAggregate(group=[{0}], agg#0=[AVG($1)])
LogicalProject(CTR_STORE_SK=[$1], CTR_TOTAL_RETURN=[$2])
LogicalAggregate(group=[{0, 1}], CTR_TOTAL_RETURN=[SUM($2)])
LogicalProject(sr_customer_sk=[$3], sr_store_sk=[$7],
sr_return_amt=[$11])
LogicalJoin(condition=[=($0, $20)], joinType=[inner])
LogicalTableScan(table=[[store_returns]])
LogicalFilter(condition=[=($6, 2000)])
LogicalTableScan(table=[[date_dim]])