Julian Hyde created CALCITE-5540:
------------------------------------

             Summary: Wrong plan for query with COUNT(DISTINCT subQuery)
                 Key: CALCITE-5540
                 URL: https://issues.apache.org/jira/browse/CALCITE-5540
             Project: Calcite
          Issue Type: Bug
            Reporter: Julian Hyde


A query with two "COUNT(DISTINCT (SELECT ...))" aggregate functions generates 
an incorrect or perhaps inefficient plan when SqlToRelConverter.expand=false.

The query occurs in LatticeSuggesterTest (enabled only with slow tests) and the 
error is discussed in CALCITE-5537.

I think output is wrong. And I prefer to set expansion to true.
One edge of LatticeSpace graph is missed when the subquery is not expanded.
The missing case is as follow, the relationship of warehouse#warehouse_class_id 
and warehouse_class#warehouse_class_id is missing.
I think it should add to the graph.

foodmart query_id:6739
{code:java}
  select
  "store"."store_type" as "c0",
  count(
    distinct (
      select
        "warehouse_class"."warehouse_class_id" AS "warehouse_class_id"
      from
        "warehouse_class" AS "warehouse_class"
      where
        "warehouse_class"."warehouse_class_id" = 
"warehouse"."warehouse_class_id"
        and "warehouse_class"."description" = 'Large Owned'
    )
  ) as "m0",
  count(
    distinct (
      select
        "warehouse_class"."warehouse_class_id" AS "warehouse_class_id"
      from
        "warehouse_class" AS "warehouse_class"
      where
        "warehouse_class"."warehouse_class_id" = 
"warehouse"."warehouse_class_id"
        and "warehouse_class"."description" = 'Large Independent'
    )
  ) as "m1",
  count(
    (
      select
        "warehouse_class"."warehouse_class_id" AS "warehouse_class_id"
      from
        "warehouse_class" AS "warehouse_class"
      where
        "warehouse_class"."warehouse_class_id" = 
"warehouse"."warehouse_class_id"
        and "warehouse_class"."description" = 'Large Independent'
    )
  ) as "m2",
  count(distinct "store_id" + "warehouse_id") as "m3",
  count("store_id" + "warehouse_id") as "m4",
  count("warehouse"."stores_id") as "m5"
from
  "store" as "store",
  "warehouse" as "warehouse"
where
  "warehouse"."stores_id" = "store"."store_id"
group by
  "store"."store_type"{code}
when set SqlToRelConverter.Config.expand = true
{code:java}
LogicalAggregate(group=[{0}], m0=[COUNT(DISTINCT $1)], m1=[COUNT(DISTINCT $2)], 
m2=[COUNT($3)], m3=[COUNT(DISTINCT $4)], m4=[COUNT()], m5=[COUNT($5)])
  LogicalProject(c0=[$1], $f0=[$39], $f00=[$40], $f01=[$42], $f4=[+($0, $24)], 
stores_id=[$26])
    LogicalJoin(condition=[=($25, $41)], joinType=[left])
      LogicalProject(store_id=[$0], store_type=[$1], region_id=[$2], 
store_name=[$3], store_number=[$4], store_street_address=[$5], store_city=[$6], 
store_state=[$7], store_postal_code=[$8], store_country=[$9], 
store_manager=[$10], store_phone=[$11], store_fax=[$12], 
first_opened_date=[$13], last_remodel_date=[$14], store_sqft=[$15], 
grocery_sqft=[$16], frozen_sqft=[$17], meat_sqft=[$18], coffee_bar=[$19], 
video_store=[$20], salad_bar=[$21], prepared_food=[$22], florist=[$23], 
warehouse_id=[$24], warehouse_class_id=[$25], stores_id=[$26], 
warehouse_name=[$27], wa_address1=[$28], wa_address2=[$29], wa_address3=[$30], 
wa_address4=[$31], warehouse_city=[$32], warehouse_state_province=[$33], 
warehouse_postal_code=[$34], warehouse_country=[$35], 
warehouse_owner_name=[$36], warehouse_phone=[$37], warehouse_fax=[$38], 
$f0=[$39], $f040=[$41])
        LogicalJoin(condition=[=($25, $40)], joinType=[left])
          LogicalProject(store_id=[$0], store_type=[$1], region_id=[$2], 
store_name=[$3], store_number=[$4], store_street_address=[$5], store_city=[$6], 
store_state=[$7], store_postal_code=[$8], store_country=[$9], 
store_manager=[$10], store_phone=[$11], store_fax=[$12], 
first_opened_date=[$13], last_remodel_date=[$14], store_sqft=[$15], 
grocery_sqft=[$16], frozen_sqft=[$17], meat_sqft=[$18], coffee_bar=[$19], 
video_store=[$20], salad_bar=[$21], prepared_food=[$22], florist=[$23], 
warehouse_id=[$24], warehouse_class_id=[$25], stores_id=[$26], 
warehouse_name=[$27], wa_address1=[$28], wa_address2=[$29], wa_address3=[$30], 
wa_address4=[$31], warehouse_city=[$32], warehouse_state_province=[$33], 
warehouse_postal_code=[$34], warehouse_country=[$35], 
warehouse_owner_name=[$36], warehouse_phone=[$37], warehouse_fax=[$38], 
$f0=[$40])
            LogicalJoin(condition=[=($25, $39)], joinType=[left])
              LogicalJoin(condition=[=($26, $0)], joinType=[inner])
                JdbcTableScan(table=[[foodmart, store]])
                JdbcTableScan(table=[[foodmart, warehouse]])
              LogicalAggregate(group=[{0}], agg#0=[SINGLE_VALUE($1)])
                LogicalProject(warehouse_class_id1=[$0], 
warehouse_class_id=[$0])
                  LogicalFilter(condition=[=($1, 'Large Owned')])
                    JdbcTableScan(table=[[foodmart, warehouse_class]])
          LogicalAggregate(group=[{0}], agg#0=[SINGLE_VALUE($1)])
            LogicalProject(warehouse_class_id1=[$0], warehouse_class_id=[$0])
              LogicalFilter(condition=[=($1, 'Large Independent')])
                JdbcTableScan(table=[[foodmart, warehouse_class]])
      LogicalAggregate(group=[{0}], agg#0=[SINGLE_VALUE($1)])
        LogicalProject(warehouse_class_id1=[$0], warehouse_class_id=[$0])
          LogicalFilter(condition=[=($1, 'Large Independent')])
            JdbcTableScan(table=[[foodmart, warehouse_class]]){code}

when set SqlToRelConverter.Config.expand = false
{code:java}
LogicalAggregate(group=[{0}], m0=[COUNT(DISTINCT $1)], m1=[COUNT(DISTINCT $2)], 
m2=[COUNT($3)], m3=[COUNT(DISTINCT $4)], m4=[COUNT()], m5=[COUNT($5)])
  LogicalProject(c0=[$1], $f1=[$SCALAR_QUERY({
LogicalProject(warehouse_class_id=[$0])
  LogicalFilter(condition=[AND(=($0, $cor0.warehouse_class_id), =($1, 'Large 
Owned'))])
    JdbcTableScan(table=[[foodmart, warehouse_class]])
})], $f2=[$SCALAR_QUERY({
LogicalProject(warehouse_class_id=[$0])
  LogicalFilter(condition=[AND(=($0, $cor1.warehouse_class_id), =($1, 'Large 
Independent'))])
    JdbcTableScan(table=[[foodmart, warehouse_class]])
})], $f3=[$SCALAR_QUERY({
LogicalProject(warehouse_class_id=[$0])
  LogicalFilter(condition=[AND(=($0, $cor2.warehouse_class_id), =($1, 'Large 
Independent'))])
    JdbcTableScan(table=[[foodmart, warehouse_class]])
})], $f4=[+($0, $24)], stores_id=[$26])
    LogicalFilter(condition=[=($26, $0)])
      LogicalJoin(condition=[true], joinType=[inner])
        JdbcTableScan(table=[[foodmart, store]])
        JdbcTableScan(table=[[foodmart, warehouse]]){code}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to