[ 
https://issues.apache.org/jira/browse/ASTERIXDB-1332?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15180625#comment-15180625
 ] 

Yingyi Bu commented on ASTERIXDB-1332:
--------------------------------------

For this specific query, it might be better to join NestedOrder at the very end 
with the rest so that join between lineitem and orders can be saved, which 
leverages the benefit of nesting.

> Query fails in rewrite phase with stack overflow
> ------------------------------------------------
>
>                 Key: ASTERIXDB-1332
>                 URL: https://issues.apache.org/jira/browse/ASTERIXDB-1332
>             Project: Apache AsterixDB
>          Issue Type: Bug
>          Components: Optimizer
>            Reporter: Pouria
>            Assignee: Yingyi Bu
>
> A rewritten version of TPCH Q9 (for nested schema) breaks in re-write step:
> //The error on cc.log
> {noformat}
> 2016-03-04 10:30:04,642 WARN  [qtp85748029-40] util.log 
> (JCLLoggerAdapter.java:warn(437)) - Error for /
> java.lang.StackOverflowError
>       at 
> org.apache.hyracks.algebricks.core.algebra.typing.PropagatingTypeEnvironment.getVarTypeFullList(PropagatingTypeEnvironment.java:84)
>       at 
> org.apache.hyracks.algebricks.core.algebra.typing.PropagatingTypeEnvironment.getVarType(PropagatingTypeEnvironment.java:73)
>       at 
> org.apache.hyracks.algebricks.core.algebra.properties.TypePropagationPolicy$1.getVarType(TypePropagationPolicy.java:42)
>       at 
> org.apache.hyracks.algebricks.core.algebra.typing.PropagatingTypeEnvironment.getVarTypeFullList(PropagatingTypeEnvironment.java:88)
>       at 
> org.apache.hyracks.algebricks.core.algebra.typing.PropagatingTypeEnvironment.getVarType(PropagatingTypeEnvironment.java:73)
>       at 
> org.apache.hyracks.algebricks.core.algebra.properties.TypePropagationPolicy$1.getVarType(TypePropagationPolicy.java:42)
>       at org.
> [… It repeats for tens of lines]
> {noformat}
> //Here is the Query:
> {noformat}
> for $profit in (
>   for $o in dataset NestedOrders
>   for $l3 in (
>     for $p in dataset('Part')
>     for $l2 in (
>       for $ps in dataset('Partsupp')
>       for $l1 in (
>         for $s1 in (
>           for $s in dataset('Supplier')
>           for $n in dataset('Nation')
>           where $n.n_nationkey = $s.s_nationkey
>           return {
>             "s_suppkey": $s.s_suppkey,
>             "n_name": $n.n_name
>           }
>         ) //s1
>         //for $l in dataset('LineItem')
>         for $l in $o.o_lineitems
>         //where $s1.s_suppkey = $l.l_suppkey       
>         return  {
>           "l_suppkey": $l.l_suppkey,
>           "l_extendedprice": $l.l_extendedprice,
>           "l_discount": $l.l_discount,
>           "l_quantity": $l.l_quantity,
>           "l_partkey": $l.l_partkey,
>           "l_orderkey": $o.o_orderkey, //$l.l_orderkey,
>           "n_name": $s1.n_name
>         }
>       ) //l1
>       where $ps.ps_suppkey = $l1.l_suppkey and $ps.ps_partkey = $l1.l_partkey 
>       
>       return {
>         "l_extendedprice": $l1.l_extendedprice,
>         "l_discount": $l1.l_discount,
>         "l_quantity": $l1.l_quantity,
>         "l_partkey": $l1.l_partkey,
>         "l_orderkey": $l1.l_orderkey,
>         "n_name": $l1.n_name,
>         "ps_supplycost": $ps.ps_supplycost
>       }
>     ) //l2
>     where contains($p.p_name, 'green') and $p.p_partkey = $l2.l_partkey    
>     return {
>       "l_extendedprice": $l2.l_extendedprice,
>       "l_discount": $l2.l_discount,
>       "l_quantity": $l2.l_quantity,
>       "l_orderkey": $l2.l_orderkey,
>       "n_name": $l2.n_name,
>       "ps_supplycost": $l2.ps_supplycost
>     }
>   ) //l3
>   where $o.o_orderkey = $l3.l_orderkey
>   let $amount := $l3.l_extendedprice * (1 - $l3.l_discount) -  
> $l3.ps_supplycost * $l3.l_quantity 
>   let $o_year := get-year($o.o_orderdate)
>   return {
>     "nation": $l3.n_name,
>     "o_year": $o_year,
>     "amount": $amount
>   }
> ) //profit
> group by $nation := $profit.nation, $o_year := $profit.o_year with $profit
> order by $nation, $o_year desc
> return {
>   "nation": $nation, 
>   "o_year": $o_year, 
>   "sum_profit": sum( for $pr in $profit return $pr.amount )
> }
> //Here is the Schema
> create type NestedLineItemType as {
>   l_partkey: int64,
>   l_suppkey: int64,
>   l_linenumber: int32,
>   l_quantity: int32,
>   l_extendedprice: double,
>   l_discount: double,
>   l_tax: double,
>   l_returnflag: string,
>   l_linestatus: string,
>   l_shipdate: string,
>   l_commitdate: string,
>   l_receiptdate: string,
>   l_shipinstruct: string,
>   l_shipmode: string,
>   l_comment: string
> }
> create type NestedOrderType as {
>   o_orderkey: int64,
>   o_custkey: int64,
>   o_orderstatus: string,
>   o_totalprice: double,
>   o_orderdate: string,
>   o_orderpriority: string,
>   o_clerk: string,
>   o_shippriority: int32,
>   o_comment: string,
>   o_lineitems: [ NestedLineItemType ]
> }
> create type CustomerType as {
>   c_custkey: int64,
>   c_name: string,
>   c_address: string,
>   c_nationkey: int32,
>   c_phone: string,
>   c_acctbal: double,
>   c_mktsegment: string,
>   c_comment: string
> }
> create type PartType as {
>   p_partkey: int64, 
>   p_name: string,
>   p_mfgr: string,
>   p_brand: string,
>   p_type: string,
>   p_size: int32,
>   p_container: string,
>   p_retailprice: double,
>   p_comment: string
> }
> create type PartSuppType as {
>   ps_partkey: int64,
>   ps_suppkey: int64,
>   ps_availqty: int32,
>   ps_supplycost: double,
>   ps_comment: string
> }
> create type SupplierType as {
>   s_suppkey: int64,
>   s_name: string,
>   s_address: string,
>   s_nationkey: int32,
>   s_phone: string,
>   s_acctbal: double,
>   s_comment: string
> }
> create type NationType as {
>   n_nationkey: int32,
>   n_name: string,
>   n_regionkey: int32,
>   n_comment: string
> }
> create type RegionType as {
>   r_regionkey: int32,
>   r_name: string,
>   r_comment: string
> }
> create dataset NestedOrders(NestedOrderType)  primary key o_orderkey;
> create dataset Customer(CustomerType) primary key c_custkey;
> create dataset Part(PartType)         primary key p_partkey;
> create dataset Partsupp(PartSuppType) primary key ps_partkey, ps_suppkey;
> create dataset Supplier(SupplierType) primary key s_suppkey;
> create dataset Region(RegionType)     primary key r_regionkey;
> create dataset Nation(NationType)     primary key n_nationkey;
> create index nation_fk_region on Nation(n_regionkey);
> create index supplier_fk_nation on Supplier (s_nationkey);
> create index partsupp_fk_part on Partsupp (ps_partkey);
> create index partsupp_fk_supplier on Partsupp (ps_suppkey);
> create index customer_fk_nation on Customer (c_nationkey);
> create index nestedOrders_fk_customer on NestedOrders (o_custkey);
> create index nestedOrders_orderdateIx on NestedOrders (o_orderdate);
> {noformat}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to