[ 
https://issues.apache.org/jira/browse/SPARK-14616?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

JESSE CHEN updated SPARK-14616:
-------------------------------
    Description: 
{code:title=tpcds q44}
 select  asceding.rnk, i1.i_product_name best_performing, i2.i_product_name 
worst_performing
from(select *
     from (select item_sk,rank() over (order by rank_col asc) rnk
           from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col
                 from store_sales ss1
                 where ss_store_sk = 4
                 group by ss_item_sk
                 having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) 
rank_col
                                                  from store_sales
                                                  where ss_store_sk = 4
                                                    and ss_addr_sk is null
                                                  group by ss_store_sk))V1)V11
     where rnk  < 11) asceding,
    (select *
     from (select item_sk,rank() over (order by rank_col desc) rnk
           from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col
                 from store_sales ss1
                 where ss_store_sk = 4
                 group by ss_item_sk
                 having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) 
rank_col
                                                  from store_sales
                                                  where ss_store_sk = 4
                                                    and ss_addr_sk is null
                                                  group by ss_store_sk))V2)V21
     where rnk  < 11) descending,
item i1,
item i2
where asceding.rnk = descending.rnk
  and i1.i_item_sk=asceding.item_sk
  and i2.i_item_sk=descending.item_sk
order by asceding.rnk
 limit 100;

{code}

{noformat}
bin/spark-sql  --driver-memory 10g --verbose --master yarn-client  --packages 
com.databricks:spark-csv_2.10:1.3.0 --executor-memory 4g --num-executors 80 
--executor-cores 2 --database hadoopds1g  -f q44.sql
{noformat}

{noformat}
org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree:
Exchange SinglePartition, None
+- WholeStageCodegen
   :  +- Project [item_sk#0,rank_col#1]
   :     +- Filter havingCondition#219: boolean
   :        +- TungstenAggregate(key=[ss_item_sk#12], 
functions=[(avg(ss_net_profit#32),mode=Final,isDistinct=false)], 
output=[havingCondition#219,item_sk#0,rank_col#1])
   :           +- INPUT
   +- Exchange hashpartitioning(ss_item_sk#12,200), None
      +- WholeStageCodegen
         :  +- TungstenAggregate(key=[ss_item_sk#12], 
functions=[(avg(ss_net_profit#32),mode=Partial,isDistinct=false)], 
output=[ss_item_sk#12,sum#612,count#613L])
         :     +- Project [ss_item_sk#12,ss_net_profit#32]
         :        +- Filter (ss_store_sk#17 = 4)
         :           +- INPUT
         +- Scan ParquetRelation: 
hadoopds1g.store_sales[ss_item_sk#12,ss_net_profit#32,ss_store_sk#17] 
InputPaths: 
hdfs://bigaperf116.svl.ibm.com:8020/apps/hive/warehouse/hadoopds1g.db/store_sales,
 PushedFilters: [EqualTo(ss_store_sk,4)]

        at 
org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:47)
        at org.apache.spark.sql.execution.Exchange.doExecute(Exchange.scala:105)
        at 
org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:118)
        at 
org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:116)
        at 
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
        at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116)
        at org.apache.spark.sql.execution.Sort.doExecute(Sort.scala:60)
        at 
org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:118)
        at 
org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:116)
        at 
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
        at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116)
        at org.apache.spark.sql.execution.Window.doExecute(Window.scala:288)
        at 
org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:118)
        at 
org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:116)
        at 
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
        at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116)
        at 
org.apache.spark.sql.execution.InputAdapter.upstream(WholeStageCodegen.scala:176)
        at 
org.apache.spark.sql.execution.Filter.upstream(basicOperators.scala:73)
        at 
org.apache.spark.sql.execution.Project.upstream(basicOperators.scala:35)
        at 
org.apache.spark.sql.execution.WholeStageCodegen.doExecute(WholeStageCodegen.scala:279)
        at 
org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:118)
        at 
org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:116)
        at 
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
        at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116)
        at org.apache.spark.sql.execution.Sort.doExecute(Sort.scala:60)
        at 
org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:118)
        at 
org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:116)
        at 
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
        at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116)
        at 
org.apache.spark.sql.execution.joins.SortMergeJoin.doExecute(SortMergeJoin.scala:63)
        at 
org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:118)

{noformat}

Note this happens only when using parquet tables. Flat file (csv) works fine 
and returns correct query results.

Additionally, query 58 also encounters the same exception. The query is as 
follows:
{noformat}
 with ss_items as
 (select i_item_id item_id
        ,sum(ss_ext_sales_price) ss_item_rev 
 from store_sales
     ,item
     ,date_dim
 where ss_item_sk = i_item_sk
   and d_date in (select d_date
                  from date_dim
                  where d_week_seq = (select d_week_seq 
                                      from date_dim
                                      where d_date = '2000-01-03'))
   and ss_sold_date_sk   = d_date_sk
 group by i_item_id),
 cs_items as
 (select i_item_id item_id
        ,sum(cs_ext_sales_price) cs_item_rev
  from catalog_sales
      ,item
      ,date_dim
 where cs_item_sk = i_item_sk
  and  d_date in (select d_date
                  from date_dim
                  where d_week_seq = (select d_week_seq 
                                      from date_dim
                                      where d_date = '2000-01-03'))
  and  cs_sold_date_sk = d_date_sk
 group by i_item_id),
 ws_items as
 (select i_item_id item_id
        ,sum(ws_ext_sales_price) ws_item_rev
  from web_sales
      ,item
      ,date_dim
 where ws_item_sk = i_item_sk
  and  d_date in (select d_date
                  from date_dim
                  where d_week_seq =(select d_week_seq 
                                     from date_dim
                                     where d_date = '2000-01-03'))
  and ws_sold_date_sk   = d_date_sk
 group by i_item_id)
  select  ss_items.item_id
       ,ss_item_rev
       ,ss_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 ss_dev
       ,cs_item_rev
       ,cs_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 cs_dev
       ,ws_item_rev
       ,ws_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 ws_dev
       ,(ss_item_rev+cs_item_rev+ws_item_rev)/3 average
 from ss_items,cs_items,ws_items
 where ss_items.item_id=cs_items.item_id
   and ss_items.item_id=ws_items.item_id 
   and ss_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev
   and ss_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev
   and cs_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev
   and cs_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev
   and ws_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev
   and ws_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev
 order by item_id
         ,ss_item_rev
  limit 100;

{noformat}

Not certain if related. But seems a case a bad plan was created.
Here is are logical, optimized and physical plans created for query 44:
{noformat}
Logical Plan =='Limit 100
+- 'Sort ['asceding.rnk ASC], true
   +- 'Project 
[unresolvedalias('asceding.rnk,None),unresolvedalias('i1.i_product_name AS 
best_performing#433,None),unresolvedalias('i2.i_product_name AS 
worst_performing#434,None)]
      +- 'Filter ((('asceding.rnk = 'descending.rnk) && ('i1.i_item_sk = 
'asceding.item_sk)) && ('i2.i_item_sk = 'descending.item_sk))
         +- 'Join Inner, None
            :- 'Join Inner, None
            :  :- 'Join Inner, None
            :  :  :- 'Subquery asceding
            :  :  :  +- 'Project [unresolvedalias(*,None)]
            :  :  :     +- 'Filter ('rnk < 11)
            :  :  :        +- 'Subquery V11
            :  :  :           +- 'Project 
[unresolvedalias('item_sk,None),unresolvedalias('rank() 
windowspecdefinition('rank_col ASC,UnspecifiedFrame) AS rnk#428,None)]
            :  :  :              +- 'Subquery V1
            :  :  :                 +- 'Filter cast(('avg('ss_net_profit) > 
(0.9 * scalarsubquery('Aggregate ['ss_store_sk], 
[unresolvedalias('avg('ss_net_profit) AS rank_col#427,None)]
+- 'Filter (('ss_store_sk = 4) && isnull('ss_addr_sk))
   +- 'UnresolvedRelation `store_sales`, None
))) as boolean)
            :  :  :                    +- 'Aggregate ['ss_item_sk], 
[unresolvedalias('ss_item_sk AS 
item_sk#425,None),unresolvedalias('avg('ss_net_profit) AS rank_col#426,None)]
            :  :  :                       +- 'Filter ('ss_store_sk = 4)
            :  :  :                          +- 'UnresolvedRelation 
`store_sales`, Some(ss1)
            :  :  +- 'Subquery descending
            :  :     +- 'Project [unresolvedalias(*,None)]
            :  :        +- 'Filter ('rnk < 11)
            :  :           +- 'Subquery V21
            :  :              +- 'Project 
[unresolvedalias('item_sk,None),unresolvedalias('rank() 
windowspecdefinition('rank_col DESC,UnspecifiedFrame) AS rnk#432,None)]
            :  :                 +- 'Subquery V2
            :  :                    +- 'Filter cast(('avg('ss_net_profit) > 
(0.9 * scalarsubquery('Aggregate ['ss_store_sk], 
[unresolvedalias('avg('ss_net_profit) AS rank_col#431,None)]
+- 'Filter (('ss_store_sk = 4) && isnull('ss_addr_sk))
   +- 'UnresolvedRelation `store_sales`, None
))) as boolean)
            :  :                       +- 'Aggregate ['ss_item_sk], 
[unresolvedalias('ss_item_sk AS 
item_sk#429,None),unresolvedalias('avg('ss_net_profit) AS rank_col#430,None)]
            :  :                          +- 'Filter ('ss_store_sk = 4)
            :  :                             +- 'UnresolvedRelation 
`store_sales`, Some(ss1)
            :  +- 'UnresolvedRelation `item`, Some(i1)
            +- 'UnresolvedRelation `item`, Some(i2)
==
Analyzed Plan ==Limit 100
+- Sort [rnk#428 ASC], true
   +- Project [rnk#428,i_product_name#202 AS 
best_performing#433,i_product_name#478 AS worst_performing#434]
      +- Filter (((rnk#428 = rnk#432) && (i_item_sk#181 = item_sk#425)) && 
(i_item_sk#457 = item_sk#429))
         +- Join Inner, None
            :- Join Inner, None
            :  :- Join Inner, None
            :  :  :- Subquery asceding
            :  :  :  +- Project [item_sk#425,rnk#428]
            :  :  :     +- Filter (rnk#428 < 11)
            :  :  :        +- Subquery V11
            :  :  :           +- Project [item_sk#425,rnk#428]
            :  :  :              +- Project 
[item_sk#425,rank_col#426,rnk#428,rnk#428]
            :  :  :                 +- Window [item_sk#425,rank_col#426], 
[rank(rank_col#426) windowspecdefinition(rank_col#426 ASC,ROWS BETWEEN 
UNBOUNDED PRECEDING AND CURRENT ROW) AS rnk#428], [rank_col#426 ASC]
            :  :  :                    +- Project [item_sk#425,rank_col#426]
            :  :  :                       +- Subquery V1
            :  :  :                          +- Project 
[item_sk#425,rank_col#426]
            :  :  :                             +- Filter havingCondition#439: 
boolean
            :  :  :                                +- Aggregate 
[ss_item_sk#282], [cast(((avg(cast(ss_net_profit#302 as 
double)),mode=Complete,isDistinct=false) > (cast(0.9 as double) * 
scalarsubquery(Aggregate [ss_store_sk#287], [(avg(cast(ss_net_profit#302 as 
double)),mode=Complete,isDistinct=false) AS rank_col#427]
+- Filter ((ss_store_sk#287 = 4) && isnull(ss_addr_sk#286))
   +- Subquery store_sales
      +- 
Relation[ss_sold_date_sk#280,ss_sold_time_sk#281,ss_item_sk#282,ss_customer_sk#283,ss_cdemo_sk#284,ss_hdemo_sk#285,ss_addr_sk#286,ss_store_sk#287,ss_promo_sk#288,ss_ticket_number#289L,ss_quantity#290L,ss_wholesale_cost#291,ss_list_price#292,ss_sales_price#293,ss_ext_discount_amt#294,ss_ext_sales_price#295,ss_ext_wholesale_cost#296,ss_ext_list_price#297,ss_ext_tax#298,ss_coupon_amt#299,ss_net_paid#300,ss_net_paid_inc_tax#301,ss_net_profit#302]
 ParquetRelation
))) as boolean) AS havingCondition#439,ss_item_sk#282 AS 
item_sk#425,(avg(cast(ss_net_profit#302 as 
double)),mode=Complete,isDistinct=false) AS rank_col#426]
            :  :  :                                   +- Filter 
(ss_store_sk#287 = 4)
            :  :  :                                      +- Subquery ss1
            :  :  :                                         +- Subquery 
store_sales
            :  :  :                                            +- 
Relation[ss_sold_date_sk#280,ss_sold_time_sk#281,ss_item_sk#282,ss_customer_sk#283,ss_cdemo_sk#284,ss_hdemo_sk#285,ss_addr_sk#286,ss_store_sk#287,ss_promo_sk#288,ss_ticket_number#289L,ss_quantity#290L,ss_wholesale_cost#291,ss_list_price#292,ss_sales_price#293,ss_ext_discount_amt#294,ss_ext_sales_price#295,ss_ext_wholesale_cost#296,ss_ext_list_price#297,ss_ext_tax#298,ss_coupon_amt#299,ss_net_paid#300,ss_net_paid_inc_tax#301,ss_net_profit#302]
 ParquetRelation
            :  :  +- Subquery descending
            :  :     +- Project [item_sk#429,rnk#432]
            :  :        +- Filter (rnk#432 < 11)
            :  :           +- Subquery V21
            :  :              +- Project [item_sk#429,rnk#432]
            :  :                 +- Project 
[item_sk#429,rank_col#430,rnk#432,rnk#432]
            :  :                    +- Window [item_sk#429,rank_col#430], 
[rank(rank_col#430) windowspecdefinition(rank_col#430 DESC,ROWS BETWEEN 
UNBOUNDED PRECEDING AND CURRENT ROW) AS rnk#432], [rank_col#430 DESC]
            :  :                       +- Project [item_sk#429,rank_col#430]
            :  :                          +- Subquery V2
            :  :                             +- Project 
[item_sk#429,rank_col#430]
            :  :                                +- Filter havingCondition#440: 
boolean
            :  :                                   +- Aggregate 
[ss_item_sk#282], [cast(((avg(cast(ss_net_profit#302 as 
double)),mode=Complete,isDistinct=false) > (cast(0.9 as double) * 
scalarsubquery(Aggregate [ss_store_sk#287], [(avg(cast(ss_net_profit#302 as 
double)),mode=Complete,isDistinct=false) AS rank_col#431]
+- Filter ((ss_store_sk#287 = 4) && isnull(ss_addr_sk#286))
   +- Subquery store_sales
      +- 
Relation[ss_sold_date_sk#280,ss_sold_time_sk#281,ss_item_sk#282,ss_customer_sk#283,ss_cdemo_sk#284,ss_hdemo_sk#285,ss_addr_sk#286,ss_store_sk#287,ss_promo_sk#288,ss_ticket_number#289L,ss_quantity#290L,ss_wholesale_cost#291,ss_list_price#292,ss_sales_price#293,ss_ext_discount_amt#294,ss_ext_sales_price#295,ss_ext_wholesale_cost#296,ss_ext_list_price#297,ss_ext_tax#298,ss_coupon_amt#299,ss_net_paid#300,ss_net_paid_inc_tax#301,ss_net_profit#302]
 ParquetRelation
))) as boolean) AS havingCondition#440,ss_item_sk#282 AS 
item_sk#429,(avg(cast(ss_net_profit#302 as 
double)),mode=Complete,isDistinct=false) AS rank_col#430]
            :  :                                      +- Filter 
(ss_store_sk#287 = 4)
            :  :                                         +- Subquery ss1
            :  :                                            +- Subquery 
store_sales
            :  :                                               +- 
Relation[ss_sold_date_sk#280,ss_sold_time_sk#281,ss_item_sk#282,ss_customer_sk#283,ss_cdemo_sk#284,ss_hdemo_sk#285,ss_addr_sk#286,ss_store_sk#287,ss_promo_sk#288,ss_ticket_number#289L,ss_quantity#290L,ss_wholesale_cost#291,ss_list_price#292,ss_sales_price#293,ss_ext_discount_amt#294,ss_ext_sales_price#295,ss_ext_wholesale_cost#296,ss_ext_list_price#297,ss_ext_tax#298,ss_coupon_amt#299,ss_net_paid#300,ss_net_paid_inc_tax#301,ss_net_profit#302]
 ParquetRelation
            :  +- Subquery i1
            :     +- Subquery item
            :        +- 
Relation[i_item_sk#181,i_item_id#182,i_rec_start_date#183,i_rec_end_date#184,i_item_desc#185,i_current_price#186,i_wholesale_cost#187,i_brand_id#188L,i_brand#189,i_class_id#190L,i_class#191,i_category_id#192L,i_category#193,i_manufact_id#194L,i_manufact#195,i_size#196,i_formulation#197,i_color#198,i_units#199,i_container#200,i_manager_id#201L,i_product_name#202]
 ParquetRelation
            +- Subquery i2
               +- Subquery item
                  +- 
Relation[i_item_sk#457,i_item_id#458,i_rec_start_date#459,i_rec_end_date#460,i_item_desc#461,i_current_price#462,i_wholesale_cost#463,i_brand_id#464L,i_brand#465,i_class_id#466L,i_class#467,i_category_id#468L,i_category#469,i_manufact_id#470L,i_manufact#471,i_size#472,i_formulation#473,i_color#474,i_units#475,i_container#476,i_manager_id#477L,i_product_name#478]
 ParquetRelation
==
Optimized Plan ==Limit 100
+- Sort [rnk#428 ASC], true
   +- Project [rnk#428,i_product_name#202 AS 
best_performing#433,i_product_name#478 AS worst_performing#434]
      +- Join Inner, Some((i_item_sk#457 = item_sk#429))
         :- Project [rnk#428,item_sk#429,i_product_name#202]
         :  +- Join Inner, Some((i_item_sk#181 = item_sk#425))
         :     :- Project [rnk#428,item_sk#429,item_sk#425]
         :     :  +- Join Inner, Some((rnk#428 = rnk#432))
         :     :     :- Project [item_sk#425,rnk#428]
         :     :     :  +- Filter (rnk#428 < 11)
         :     :     :     +- Window [item_sk#425,rank_col#426], 
[rank(rank_col#426) windowspecdefinition(rank_col#426 ASC,ROWS BETWEEN 
UNBOUNDED PRECEDING AND CURRENT ROW) AS rnk#428], [rank_col#426 ASC]
         :     :     :        +- Project [item_sk#425,rank_col#426]
         :     :     :           +- Filter havingCondition#439: boolean
         :     :     :              +- Aggregate [ss_item_sk#282], 
[((avg(cast(ss_net_profit#302 as double)),mode=Complete,isDistinct=false) > 
(0.9 * scalarsubquery(Aggregate [ss_store_sk#287], [(avg(cast(ss_net_profit#302 
as double)),mode=Complete,isDistinct=false) AS rank_col#427]
+- Filter ((ss_store_sk#287 = 4) && isnull(ss_addr_sk#286))
   +- Subquery store_sales
      +- 
Relation[ss_sold_date_sk#280,ss_sold_time_sk#281,ss_item_sk#282,ss_customer_sk#283,ss_cdemo_sk#284,ss_hdemo_sk#285,ss_addr_sk#286,ss_store_sk#287,ss_promo_sk#288,ss_ticket_number#289L,ss_quantity#290L,ss_wholesale_cost#291,ss_list_price#292,ss_sales_price#293,ss_ext_discount_amt#294,ss_ext_sales_price#295,ss_ext_wholesale_cost#296,ss_ext_list_price#297,ss_ext_tax#298,ss_coupon_amt#299,ss_net_paid#300,ss_net_paid_inc_tax#301,ss_net_profit#302]
 ParquetRelation
))) AS havingCondition#439,ss_item_sk#282 AS 
item_sk#425,(avg(cast(ss_net_profit#302 as 
double)),mode=Complete,isDistinct=false) AS rank_col#426]
         :     :     :                 +- Project 
[ss_item_sk#282,ss_net_profit#302]
         :     :     :                    +- Filter (ss_store_sk#287 = 4)
         :     :     :                       +- 
Relation[ss_sold_date_sk#280,ss_sold_time_sk#281,ss_item_sk#282,ss_customer_sk#283,ss_cdemo_sk#284,ss_hdemo_sk#285,ss_addr_sk#286,ss_store_sk#287,ss_promo_sk#288,ss_ticket_number#289L,ss_quantity#290L,ss_wholesale_cost#291,ss_list_price#292,ss_sales_price#293,ss_ext_discount_amt#294,ss_ext_sales_price#295,ss_ext_wholesale_cost#296,ss_ext_list_price#297,ss_ext_tax#298,ss_coupon_amt#299,ss_net_paid#300,ss_net_paid_inc_tax#301,ss_net_profit#302]
 ParquetRelation
         :     :     +- Project [item_sk#429,rnk#432]
         :     :        +- Filter (rnk#432 < 11)
         :     :           +- Window [item_sk#429,rank_col#430], 
[rank(rank_col#430) windowspecdefinition(rank_col#430 DESC,ROWS BETWEEN 
UNBOUNDED PRECEDING AND CURRENT ROW) AS rnk#432], [rank_col#430 DESC]
         :     :              +- Project [item_sk#429,rank_col#430]
         :     :                 +- Filter havingCondition#440: boolean
         :     :                    +- Aggregate [ss_item_sk#282], 
[((avg(cast(ss_net_profit#302 as double)),mode=Complete,isDistinct=false) > 
(0.9 * scalarsubquery(Aggregate [ss_store_sk#287], [(avg(cast(ss_net_profit#302 
as double)),mode=Complete,isDistinct=false) AS rank_col#431]
+- Filter ((ss_store_sk#287 = 4) && isnull(ss_addr_sk#286))
   +- Subquery store_sales
      +- 
Relation[ss_sold_date_sk#280,ss_sold_time_sk#281,ss_item_sk#282,ss_customer_sk#283,ss_cdemo_sk#284,ss_hdemo_sk#285,ss_addr_sk#286,ss_store_sk#287,ss_promo_sk#288,ss_ticket_number#289L,ss_quantity#290L,ss_wholesale_cost#291,ss_list_price#292,ss_sales_price#293,ss_ext_discount_amt#294,ss_ext_sales_price#295,ss_ext_wholesale_cost#296,ss_ext_list_price#297,ss_ext_tax#298,ss_coupon_amt#299,ss_net_paid#300,ss_net_paid_inc_tax#301,ss_net_profit#302]
 ParquetRelation
))) AS havingCondition#440,ss_item_sk#282 AS 
item_sk#429,(avg(cast(ss_net_profit#302 as 
double)),mode=Complete,isDistinct=false) AS rank_col#430]
         :     :                       +- Project 
[ss_item_sk#282,ss_net_profit#302]
         :     :                          +- Filter (ss_store_sk#287 = 4)
         :     :                             +- 
Relation[ss_sold_date_sk#280,ss_sold_time_sk#281,ss_item_sk#282,ss_customer_sk#283,ss_cdemo_sk#284,ss_hdemo_sk#285,ss_addr_sk#286,ss_store_sk#287,ss_promo_sk#288,ss_ticket_number#289L,ss_quantity#290L,ss_wholesale_cost#291,ss_list_price#292,ss_sales_price#293,ss_ext_discount_amt#294,ss_ext_sales_price#295,ss_ext_wholesale_cost#296,ss_ext_list_price#297,ss_ext_tax#298,ss_coupon_amt#299,ss_net_paid#300,ss_net_paid_inc_tax#301,ss_net_profit#302]
 ParquetRelation
         :     +- Project [i_item_sk#181,i_product_name#202]
         :        +- 
Relation[i_item_sk#181,i_item_id#182,i_rec_start_date#183,i_rec_end_date#184,i_item_desc#185,i_current_price#186,i_wholesale_cost#187,i_brand_id#188L,i_brand#189,i_class_id#190L,i_class#191,i_category_id#192L,i_category#193,i_manufact_id#194L,i_manufact#195,i_size#196,i_formulation#197,i_color#198,i_units#199,i_container#200,i_manager_id#201L,i_product_name#202]
 ParquetRelation
         +- Project [i_item_sk#457,i_product_name#478]
            +- 
Relation[i_item_sk#457,i_item_id#458,i_rec_start_date#459,i_rec_end_date#460,i_item_desc#461,i_current_price#462,i_wholesale_cost#463,i_brand_id#464L,i_brand#465,i_class_id#466L,i_class#467,i_category_id#468L,i_category#469,i_manufact_id#470L,i_manufact#471,i_size#472,i_formulation#473,i_color#474,i_units#475,i_container#476,i_manager_id#477L,i_product_name#478]
 ParquetRelation
==
Physical Plan ==TakeOrderedAndProject(limit=100, orderBy=[rnk#428 ASC], 
output=[rnk#428,best_performing#433,worst_performing#434])
+- WholeStageCodegen
   :  +- Project [rnk#428,i_product_name#202 AS 
best_performing#433,i_product_name#478 AS worst_performing#434]
   :     +- BroadcastHashJoin [item_sk#429], [i_item_sk#457], BuildRight, None
   :        :- Project [rnk#428,item_sk#429,i_product_name#202]
   :        :  +- BroadcastHashJoin [item_sk#425], [i_item_sk#181], BuildRight, 
None
   :        :     :- Project [rnk#428,item_sk#429,item_sk#425]
   :        :     :  +- INPUT
   :        :     +- INPUT
   :        +- INPUT
   :- SortMergeJoin [rnk#428], [rnk#432], None
   :  :- Sort [rnk#428 ASC], false, 0
   :  :  +- WholeStageCodegen
   :  :     :  +- Project [item_sk#425,rnk#428]
   :  :     :     +- Filter (rnk#428 < 11)
   :  :     :        +- INPUT
   :  :     +- Window [item_sk#425,rank_col#426], [rank(rank_col#426) 
windowspecdefinition(rank_col#426 ASC,ROWS BETWEEN UNBOUNDED PRECEDING AND 
CURRENT ROW) AS rnk#428], [rank_col#426 ASC]
   :  :        +- Sort [rank_col#426 ASC], false, 0
   :  :           +- Exchange SinglePartition, None
   :  :              +- WholeStageCodegen
   :  :                 :  +- Project [item_sk#425,rank_col#426]
   :  :                 :     +- Filter havingCondition#439: boolean
   :  :                 :        +- TungstenAggregate(key=[ss_item_sk#282], 
functions=[(avg(cast(ss_net_profit#302 as 
double)),mode=Final,isDistinct=false)], 
output=[havingCondition#439,item_sk#425,rank_col#426])
   :  :                 :           +- INPUT
   :  :                 +- Exchange hashpartitioning(ss_item_sk#282,200), None
   :  :                    +- WholeStageCodegen
   :  :                       :  +- TungstenAggregate(key=[ss_item_sk#282], 
functions=[(avg(cast(ss_net_profit#302 as 
double)),mode=Partial,isDistinct=false)], 
output=[ss_item_sk#282,sum#506,count#507L])
   :  :                       :     +- Project 
[ss_item_sk#282,ss_net_profit#302]
   :  :                       :        +- Filter (ss_store_sk#287 = 4)
   :  :                       :           +- INPUT
   :  :                       +- Scan 
ParquetRelation[ss_item_sk#282,ss_net_profit#302,ss_store_sk#287] InputPaths: 
hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/store_sales/_SUCCESS,
 
hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/store_sales/_common_metadata,
 
hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/store_sales/_metadata,
 
hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/store_sales/part-r-00000-2245f9f3-e00c-4c6c-b9cd-62179efcb22c.gz.parquet,
 
hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/store_sales/part-r-00001-2245f9f3-e00c-4c6c-b9cd-62179efcb22c.gz.parquet,
 
hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/store_sales/part-r-00002-2245f9f3-e00c-4c6c-b9cd-62179efcb22c.gz.parquet,
 PushedFilters: [EqualTo(ss_store_sk,4)]
   :  +- Sort [rnk#432 ASC], false, 0
   :     +- WholeStageCodegen
   :        :  +- Project [item_sk#429,rnk#432]
   :        :     +- Filter (rnk#432 < 11)
   :        :        +- INPUT
   :        +- Window [item_sk#429,rank_col#430], [rank(rank_col#430) 
windowspecdefinition(rank_col#430 DESC,ROWS BETWEEN UNBOUNDED PRECEDING AND 
CURRENT ROW) AS rnk#432], [rank_col#430 DESC]
   :           +- Sort [rank_col#430 DESC], false, 0
   :              +- Exchange SinglePartition, None
   :                 +- WholeStageCodegen
   :                    :  +- Project [item_sk#429,rank_col#430]
   :                    :     +- Filter havingCondition#440: boolean
   :                    :        +- TungstenAggregate(key=[ss_item_sk#282], 
functions=[(avg(cast(ss_net_profit#302 as 
double)),mode=Final,isDistinct=false)], 
output=[havingCondition#440,item_sk#429,rank_col#430])
   :                    :           +- INPUT
   :                    +- Exchange hashpartitioning(ss_item_sk#282,200), None
   :                       +- WholeStageCodegen
   :                          :  +- TungstenAggregate(key=[ss_item_sk#282], 
functions=[(avg(cast(ss_net_profit#302 as 
double)),mode=Partial,isDistinct=false)], 
output=[ss_item_sk#282,sum#511,count#512L])
   :                          :     +- Project 
[ss_item_sk#282,ss_net_profit#302]
   :                          :        +- Filter (ss_store_sk#287 = 4)
   :                          :           +- INPUT
   :                          +- Scan 
ParquetRelation[ss_item_sk#282,ss_net_profit#302,ss_store_sk#287] InputPaths: 
hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/store_sales/_SUCCESS,
 
hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/store_sales/_common_metadata,
 
hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/store_sales/_metadata,
 
hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/store_sales/part-r-00000-2245f9f3-e00c-4c6c-b9cd-62179efcb22c.gz.parquet,
 
hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/store_sales/part-r-00001-2245f9f3-e00c-4c6c-b9cd-62179efcb22c.gz.parquet,
 
hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/store_sales/part-r-00002-2245f9f3-e00c-4c6c-b9cd-62179efcb22c.gz.parquet,
 PushedFilters: [EqualTo(ss_store_sk,4)]
   :- Scan ParquetRelation[i_item_sk#181,i_product_name#202] InputPaths: 
hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/item/_SUCCESS,
 
hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/item/_common_metadata,
 
hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/item/_metadata,
 
hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/item/part-r-00000-09ab6229-4b98-42a8-988e-2f1706436007.gz.parquet,
 
hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/item/part-r-00001-09ab6229-4b98-42a8-988e-2f1706436007.gz.parquet
   +- Scan ParquetRelation[i_item_sk#457,i_product_name#478] InputPaths: 
hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/item/_SUCCESS,
 
hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/item/_common_metadata,
 
hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/item/_metadata,
 
hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/item/part-r-00000-09ab6229-4b98-42a8-988e-2f1706436007.gz.parquet,
 
hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/item/part-r-00001-09ab6229-4b98-42a8-988e-2f1706436007.gz.parquet

{noformat}

  was:
{code:title=/tmp/bug.py}
from pyspark import SparkContext
from pyspark.sql import SQLContext, Row

sc = SparkContext()
sqlc = SQLContext(sc)

R = Row('id', 'foo')
r = sqlc.createDataFrame(sc.parallelize([R('abc', 'foo')]))
q = sqlc.createDataFrame(sc.parallelize([R('aaaaaaaaaaaaaaaaaaaaaaaa', 'bar')]))
q.write.parquet('/tmp/1.parq')
q = sqlc.read.parquet('/tmp/1.parq')
j = r.join(q, r.id == q.id)
print j.count()
{code}

{noformat}
[user@sandbox test]$ spark-submit --executor-memory=32g /tmp/bug.py
[user@sandbox test]$ hadoop fs -rmr /tmp/1.parq
{noformat}

{noformat}
15/11/04 04:28:38 INFO codegen.GenerateUnsafeProjection: Code generated in 
119.90324 ms
Traceback (most recent call last):
  File "/tmp/bug.py", line 13, in <module>
    print j.count()
  File "/usr/lib/spark/python/lib/pyspark.zip/pyspark/sql/dataframe.py", line 
268, in count
  File "/usr/lib/spark/python/lib/py4j-0.8.2.1-src.zip/py4j/java_gateway.py", 
line 538, in __call__
  File "/usr/lib/spark/python/lib/pyspark.zip/pyspark/sql/utils.py", line 36, 
in deco
  File "/usr/lib/spark/python/lib/py4j-0.8.2.1-src.zip/py4j/protocol.py", line 
300, in get_return_value
py4j.protocol.Py4JJavaError: An error occurred while calling o148.count.
: org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree:
TungstenAggregate(key=[], functions=[(count(1),mode=Final,isDistinct=false)], 
output=[count#10L])
 TungstenExchange SinglePartition
  TungstenAggregate(key=[], 
functions=[(count(1),mode=Partial,isDistinct=false)], output=[currentCount#13L])
   TungstenProject
    BroadcastHashJoin [id#0], [id#8], BuildRight
     TungstenProject [id#0]
      Scan PhysicalRDD[id#0,foo#1]
     ConvertToUnsafe
      Scan ParquetRelation[hdfs:///tmp/1.parq][id#8]

        at 
org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:49)
        at 
org.apache.spark.sql.execution.aggregate.TungstenAggregate.doExecute(TungstenAggregate.scala:69)
        at 
org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$5.apply(SparkPlan.scala:140)
        at 
org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$5.apply(SparkPlan.scala:138)
        at 
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147)
        at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:138)
        at 
org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:174)
        at 
org.apache.spark.sql.DataFrame$$anonfun$collect$1.apply(DataFrame.scala:1385)
        at 
org.apache.spark.sql.DataFrame$$anonfun$collect$1.apply(DataFrame.scala:1385)
        at 
org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:56)
        at 
org.apache.spark.sql.DataFrame.withNewExecutionId(DataFrame.scala:1903)
        at org.apache.spark.sql.DataFrame.collect(DataFrame.scala:1384)
        at org.apache.spark.sql.DataFrame.count(DataFrame.scala:1402)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at 
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
        at 
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:497)
        at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:231)
        at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:379)
        at py4j.Gateway.invoke(Gateway.java:259)
        at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:133)
        at py4j.commands.CallCommand.execute(CallCommand.java:79)
        at py4j.GatewayConnection.run(GatewayConnection.java:207)
        at java.lang.Thread.run(Thread.java:745)
{noformat}

Note this happens only under following condition:
# executor memory >= 32GB (doesn't fail with up to 31 GB)
# the ID in the q dataframe has exactly 24 chars (doesn't fail with less or 
more then 24 chars)
# q is read from parquet


> TreeNodeException running Q44 and 58 on Parquet tables
> ------------------------------------------------------
>
>                 Key: SPARK-14616
>                 URL: https://issues.apache.org/jira/browse/SPARK-14616
>             Project: Spark
>          Issue Type: Bug
>          Components: SQL
>    Affects Versions: 2.0.0
>            Reporter: JESSE CHEN
>
> {code:title=tpcds q44}
>  select  asceding.rnk, i1.i_product_name best_performing, i2.i_product_name 
> worst_performing
> from(select *
>      from (select item_sk,rank() over (order by rank_col asc) rnk
>            from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col
>                  from store_sales ss1
>                  where ss_store_sk = 4
>                  group by ss_item_sk
>                  having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) 
> rank_col
>                                                   from store_sales
>                                                   where ss_store_sk = 4
>                                                     and ss_addr_sk is null
>                                                   group by ss_store_sk))V1)V11
>      where rnk  < 11) asceding,
>     (select *
>      from (select item_sk,rank() over (order by rank_col desc) rnk
>            from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col
>                  from store_sales ss1
>                  where ss_store_sk = 4
>                  group by ss_item_sk
>                  having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) 
> rank_col
>                                                   from store_sales
>                                                   where ss_store_sk = 4
>                                                     and ss_addr_sk is null
>                                                   group by ss_store_sk))V2)V21
>      where rnk  < 11) descending,
> item i1,
> item i2
> where asceding.rnk = descending.rnk
>   and i1.i_item_sk=asceding.item_sk
>   and i2.i_item_sk=descending.item_sk
> order by asceding.rnk
>  limit 100;
> {code}
> {noformat}
> bin/spark-sql  --driver-memory 10g --verbose --master yarn-client  --packages 
> com.databricks:spark-csv_2.10:1.3.0 --executor-memory 4g --num-executors 80 
> --executor-cores 2 --database hadoopds1g  -f q44.sql
> {noformat}
> {noformat}
> org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree:
> Exchange SinglePartition, None
> +- WholeStageCodegen
>    :  +- Project [item_sk#0,rank_col#1]
>    :     +- Filter havingCondition#219: boolean
>    :        +- TungstenAggregate(key=[ss_item_sk#12], 
> functions=[(avg(ss_net_profit#32),mode=Final,isDistinct=false)], 
> output=[havingCondition#219,item_sk#0,rank_col#1])
>    :           +- INPUT
>    +- Exchange hashpartitioning(ss_item_sk#12,200), None
>       +- WholeStageCodegen
>          :  +- TungstenAggregate(key=[ss_item_sk#12], 
> functions=[(avg(ss_net_profit#32),mode=Partial,isDistinct=false)], 
> output=[ss_item_sk#12,sum#612,count#613L])
>          :     +- Project [ss_item_sk#12,ss_net_profit#32]
>          :        +- Filter (ss_store_sk#17 = 4)
>          :           +- INPUT
>          +- Scan ParquetRelation: 
> hadoopds1g.store_sales[ss_item_sk#12,ss_net_profit#32,ss_store_sk#17] 
> InputPaths: 
> hdfs://bigaperf116.svl.ibm.com:8020/apps/hive/warehouse/hadoopds1g.db/store_sales,
>  PushedFilters: [EqualTo(ss_store_sk,4)]
>         at 
> org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:47)
>         at 
> org.apache.spark.sql.execution.Exchange.doExecute(Exchange.scala:105)
>         at 
> org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:118)
>         at 
> org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:116)
>         at 
> org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
>         at 
> org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116)
>         at org.apache.spark.sql.execution.Sort.doExecute(Sort.scala:60)
>         at 
> org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:118)
>         at 
> org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:116)
>         at 
> org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
>         at 
> org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116)
>         at org.apache.spark.sql.execution.Window.doExecute(Window.scala:288)
>         at 
> org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:118)
>         at 
> org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:116)
>         at 
> org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
>         at 
> org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116)
>         at 
> org.apache.spark.sql.execution.InputAdapter.upstream(WholeStageCodegen.scala:176)
>         at 
> org.apache.spark.sql.execution.Filter.upstream(basicOperators.scala:73)
>         at 
> org.apache.spark.sql.execution.Project.upstream(basicOperators.scala:35)
>         at 
> org.apache.spark.sql.execution.WholeStageCodegen.doExecute(WholeStageCodegen.scala:279)
>         at 
> org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:118)
>         at 
> org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:116)
>         at 
> org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
>         at 
> org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116)
>         at org.apache.spark.sql.execution.Sort.doExecute(Sort.scala:60)
>         at 
> org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:118)
>         at 
> org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:116)
>         at 
> org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
>         at 
> org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116)
>         at 
> org.apache.spark.sql.execution.joins.SortMergeJoin.doExecute(SortMergeJoin.scala:63)
>         at 
> org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:118)
> {noformat}
> Note this happens only when using parquet tables. Flat file (csv) works fine 
> and returns correct query results.
> Additionally, query 58 also encounters the same exception. The query is as 
> follows:
> {noformat}
>  with ss_items as
>  (select i_item_id item_id
>         ,sum(ss_ext_sales_price) ss_item_rev 
>  from store_sales
>      ,item
>      ,date_dim
>  where ss_item_sk = i_item_sk
>    and d_date in (select d_date
>                   from date_dim
>                   where d_week_seq = (select d_week_seq 
>                                       from date_dim
>                                       where d_date = '2000-01-03'))
>    and ss_sold_date_sk   = d_date_sk
>  group by i_item_id),
>  cs_items as
>  (select i_item_id item_id
>         ,sum(cs_ext_sales_price) cs_item_rev
>   from catalog_sales
>       ,item
>       ,date_dim
>  where cs_item_sk = i_item_sk
>   and  d_date in (select d_date
>                   from date_dim
>                   where d_week_seq = (select d_week_seq 
>                                       from date_dim
>                                       where d_date = '2000-01-03'))
>   and  cs_sold_date_sk = d_date_sk
>  group by i_item_id),
>  ws_items as
>  (select i_item_id item_id
>         ,sum(ws_ext_sales_price) ws_item_rev
>   from web_sales
>       ,item
>       ,date_dim
>  where ws_item_sk = i_item_sk
>   and  d_date in (select d_date
>                   from date_dim
>                   where d_week_seq =(select d_week_seq 
>                                      from date_dim
>                                      where d_date = '2000-01-03'))
>   and ws_sold_date_sk   = d_date_sk
>  group by i_item_id)
>   select  ss_items.item_id
>        ,ss_item_rev
>        ,ss_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 ss_dev
>        ,cs_item_rev
>        ,cs_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 cs_dev
>        ,ws_item_rev
>        ,ws_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 ws_dev
>        ,(ss_item_rev+cs_item_rev+ws_item_rev)/3 average
>  from ss_items,cs_items,ws_items
>  where ss_items.item_id=cs_items.item_id
>    and ss_items.item_id=ws_items.item_id 
>    and ss_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev
>    and ss_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev
>    and cs_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev
>    and cs_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev
>    and ws_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev
>    and ws_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev
>  order by item_id
>          ,ss_item_rev
>   limit 100;
> {noformat}
> Not certain if related. But seems a case a bad plan was created.
> Here is are logical, optimized and physical plans created for query 44:
> {noformat}
> Logical Plan =='Limit 100
> +- 'Sort ['asceding.rnk ASC], true
>    +- 'Project 
> [unresolvedalias('asceding.rnk,None),unresolvedalias('i1.i_product_name AS 
> best_performing#433,None),unresolvedalias('i2.i_product_name AS 
> worst_performing#434,None)]
>       +- 'Filter ((('asceding.rnk = 'descending.rnk) && ('i1.i_item_sk = 
> 'asceding.item_sk)) && ('i2.i_item_sk = 'descending.item_sk))
>          +- 'Join Inner, None
>             :- 'Join Inner, None
>             :  :- 'Join Inner, None
>             :  :  :- 'Subquery asceding
>             :  :  :  +- 'Project [unresolvedalias(*,None)]
>             :  :  :     +- 'Filter ('rnk < 11)
>             :  :  :        +- 'Subquery V11
>             :  :  :           +- 'Project 
> [unresolvedalias('item_sk,None),unresolvedalias('rank() 
> windowspecdefinition('rank_col ASC,UnspecifiedFrame) AS rnk#428,None)]
>             :  :  :              +- 'Subquery V1
>             :  :  :                 +- 'Filter cast(('avg('ss_net_profit) > 
> (0.9 * scalarsubquery('Aggregate ['ss_store_sk], 
> [unresolvedalias('avg('ss_net_profit) AS rank_col#427,None)]
> +- 'Filter (('ss_store_sk = 4) && isnull('ss_addr_sk))
>    +- 'UnresolvedRelation `store_sales`, None
> ))) as boolean)
>             :  :  :                    +- 'Aggregate ['ss_item_sk], 
> [unresolvedalias('ss_item_sk AS 
> item_sk#425,None),unresolvedalias('avg('ss_net_profit) AS rank_col#426,None)]
>             :  :  :                       +- 'Filter ('ss_store_sk = 4)
>             :  :  :                          +- 'UnresolvedRelation 
> `store_sales`, Some(ss1)
>             :  :  +- 'Subquery descending
>             :  :     +- 'Project [unresolvedalias(*,None)]
>             :  :        +- 'Filter ('rnk < 11)
>             :  :           +- 'Subquery V21
>             :  :              +- 'Project 
> [unresolvedalias('item_sk,None),unresolvedalias('rank() 
> windowspecdefinition('rank_col DESC,UnspecifiedFrame) AS rnk#432,None)]
>             :  :                 +- 'Subquery V2
>             :  :                    +- 'Filter cast(('avg('ss_net_profit) > 
> (0.9 * scalarsubquery('Aggregate ['ss_store_sk], 
> [unresolvedalias('avg('ss_net_profit) AS rank_col#431,None)]
> +- 'Filter (('ss_store_sk = 4) && isnull('ss_addr_sk))
>    +- 'UnresolvedRelation `store_sales`, None
> ))) as boolean)
>             :  :                       +- 'Aggregate ['ss_item_sk], 
> [unresolvedalias('ss_item_sk AS 
> item_sk#429,None),unresolvedalias('avg('ss_net_profit) AS rank_col#430,None)]
>             :  :                          +- 'Filter ('ss_store_sk = 4)
>             :  :                             +- 'UnresolvedRelation 
> `store_sales`, Some(ss1)
>             :  +- 'UnresolvedRelation `item`, Some(i1)
>             +- 'UnresolvedRelation `item`, Some(i2)
> ==
> Analyzed Plan ==Limit 100
> +- Sort [rnk#428 ASC], true
>    +- Project [rnk#428,i_product_name#202 AS 
> best_performing#433,i_product_name#478 AS worst_performing#434]
>       +- Filter (((rnk#428 = rnk#432) && (i_item_sk#181 = item_sk#425)) && 
> (i_item_sk#457 = item_sk#429))
>          +- Join Inner, None
>             :- Join Inner, None
>             :  :- Join Inner, None
>             :  :  :- Subquery asceding
>             :  :  :  +- Project [item_sk#425,rnk#428]
>             :  :  :     +- Filter (rnk#428 < 11)
>             :  :  :        +- Subquery V11
>             :  :  :           +- Project [item_sk#425,rnk#428]
>             :  :  :              +- Project 
> [item_sk#425,rank_col#426,rnk#428,rnk#428]
>             :  :  :                 +- Window [item_sk#425,rank_col#426], 
> [rank(rank_col#426) windowspecdefinition(rank_col#426 ASC,ROWS BETWEEN 
> UNBOUNDED PRECEDING AND CURRENT ROW) AS rnk#428], [rank_col#426 ASC]
>             :  :  :                    +- Project [item_sk#425,rank_col#426]
>             :  :  :                       +- Subquery V1
>             :  :  :                          +- Project 
> [item_sk#425,rank_col#426]
>             :  :  :                             +- Filter 
> havingCondition#439: boolean
>             :  :  :                                +- Aggregate 
> [ss_item_sk#282], [cast(((avg(cast(ss_net_profit#302 as 
> double)),mode=Complete,isDistinct=false) > (cast(0.9 as double) * 
> scalarsubquery(Aggregate [ss_store_sk#287], [(avg(cast(ss_net_profit#302 as 
> double)),mode=Complete,isDistinct=false) AS rank_col#427]
> +- Filter ((ss_store_sk#287 = 4) && isnull(ss_addr_sk#286))
>    +- Subquery store_sales
>       +- 
> Relation[ss_sold_date_sk#280,ss_sold_time_sk#281,ss_item_sk#282,ss_customer_sk#283,ss_cdemo_sk#284,ss_hdemo_sk#285,ss_addr_sk#286,ss_store_sk#287,ss_promo_sk#288,ss_ticket_number#289L,ss_quantity#290L,ss_wholesale_cost#291,ss_list_price#292,ss_sales_price#293,ss_ext_discount_amt#294,ss_ext_sales_price#295,ss_ext_wholesale_cost#296,ss_ext_list_price#297,ss_ext_tax#298,ss_coupon_amt#299,ss_net_paid#300,ss_net_paid_inc_tax#301,ss_net_profit#302]
>  ParquetRelation
> ))) as boolean) AS havingCondition#439,ss_item_sk#282 AS 
> item_sk#425,(avg(cast(ss_net_profit#302 as 
> double)),mode=Complete,isDistinct=false) AS rank_col#426]
>             :  :  :                                   +- Filter 
> (ss_store_sk#287 = 4)
>             :  :  :                                      +- Subquery ss1
>             :  :  :                                         +- Subquery 
> store_sales
>             :  :  :                                            +- 
> Relation[ss_sold_date_sk#280,ss_sold_time_sk#281,ss_item_sk#282,ss_customer_sk#283,ss_cdemo_sk#284,ss_hdemo_sk#285,ss_addr_sk#286,ss_store_sk#287,ss_promo_sk#288,ss_ticket_number#289L,ss_quantity#290L,ss_wholesale_cost#291,ss_list_price#292,ss_sales_price#293,ss_ext_discount_amt#294,ss_ext_sales_price#295,ss_ext_wholesale_cost#296,ss_ext_list_price#297,ss_ext_tax#298,ss_coupon_amt#299,ss_net_paid#300,ss_net_paid_inc_tax#301,ss_net_profit#302]
>  ParquetRelation
>             :  :  +- Subquery descending
>             :  :     +- Project [item_sk#429,rnk#432]
>             :  :        +- Filter (rnk#432 < 11)
>             :  :           +- Subquery V21
>             :  :              +- Project [item_sk#429,rnk#432]
>             :  :                 +- Project 
> [item_sk#429,rank_col#430,rnk#432,rnk#432]
>             :  :                    +- Window [item_sk#429,rank_col#430], 
> [rank(rank_col#430) windowspecdefinition(rank_col#430 DESC,ROWS BETWEEN 
> UNBOUNDED PRECEDING AND CURRENT ROW) AS rnk#432], [rank_col#430 DESC]
>             :  :                       +- Project [item_sk#429,rank_col#430]
>             :  :                          +- Subquery V2
>             :  :                             +- Project 
> [item_sk#429,rank_col#430]
>             :  :                                +- Filter 
> havingCondition#440: boolean
>             :  :                                   +- Aggregate 
> [ss_item_sk#282], [cast(((avg(cast(ss_net_profit#302 as 
> double)),mode=Complete,isDistinct=false) > (cast(0.9 as double) * 
> scalarsubquery(Aggregate [ss_store_sk#287], [(avg(cast(ss_net_profit#302 as 
> double)),mode=Complete,isDistinct=false) AS rank_col#431]
> +- Filter ((ss_store_sk#287 = 4) && isnull(ss_addr_sk#286))
>    +- Subquery store_sales
>       +- 
> Relation[ss_sold_date_sk#280,ss_sold_time_sk#281,ss_item_sk#282,ss_customer_sk#283,ss_cdemo_sk#284,ss_hdemo_sk#285,ss_addr_sk#286,ss_store_sk#287,ss_promo_sk#288,ss_ticket_number#289L,ss_quantity#290L,ss_wholesale_cost#291,ss_list_price#292,ss_sales_price#293,ss_ext_discount_amt#294,ss_ext_sales_price#295,ss_ext_wholesale_cost#296,ss_ext_list_price#297,ss_ext_tax#298,ss_coupon_amt#299,ss_net_paid#300,ss_net_paid_inc_tax#301,ss_net_profit#302]
>  ParquetRelation
> ))) as boolean) AS havingCondition#440,ss_item_sk#282 AS 
> item_sk#429,(avg(cast(ss_net_profit#302 as 
> double)),mode=Complete,isDistinct=false) AS rank_col#430]
>             :  :                                      +- Filter 
> (ss_store_sk#287 = 4)
>             :  :                                         +- Subquery ss1
>             :  :                                            +- Subquery 
> store_sales
>             :  :                                               +- 
> Relation[ss_sold_date_sk#280,ss_sold_time_sk#281,ss_item_sk#282,ss_customer_sk#283,ss_cdemo_sk#284,ss_hdemo_sk#285,ss_addr_sk#286,ss_store_sk#287,ss_promo_sk#288,ss_ticket_number#289L,ss_quantity#290L,ss_wholesale_cost#291,ss_list_price#292,ss_sales_price#293,ss_ext_discount_amt#294,ss_ext_sales_price#295,ss_ext_wholesale_cost#296,ss_ext_list_price#297,ss_ext_tax#298,ss_coupon_amt#299,ss_net_paid#300,ss_net_paid_inc_tax#301,ss_net_profit#302]
>  ParquetRelation
>             :  +- Subquery i1
>             :     +- Subquery item
>             :        +- 
> Relation[i_item_sk#181,i_item_id#182,i_rec_start_date#183,i_rec_end_date#184,i_item_desc#185,i_current_price#186,i_wholesale_cost#187,i_brand_id#188L,i_brand#189,i_class_id#190L,i_class#191,i_category_id#192L,i_category#193,i_manufact_id#194L,i_manufact#195,i_size#196,i_formulation#197,i_color#198,i_units#199,i_container#200,i_manager_id#201L,i_product_name#202]
>  ParquetRelation
>             +- Subquery i2
>                +- Subquery item
>                   +- 
> Relation[i_item_sk#457,i_item_id#458,i_rec_start_date#459,i_rec_end_date#460,i_item_desc#461,i_current_price#462,i_wholesale_cost#463,i_brand_id#464L,i_brand#465,i_class_id#466L,i_class#467,i_category_id#468L,i_category#469,i_manufact_id#470L,i_manufact#471,i_size#472,i_formulation#473,i_color#474,i_units#475,i_container#476,i_manager_id#477L,i_product_name#478]
>  ParquetRelation
> ==
> Optimized Plan ==Limit 100
> +- Sort [rnk#428 ASC], true
>    +- Project [rnk#428,i_product_name#202 AS 
> best_performing#433,i_product_name#478 AS worst_performing#434]
>       +- Join Inner, Some((i_item_sk#457 = item_sk#429))
>          :- Project [rnk#428,item_sk#429,i_product_name#202]
>          :  +- Join Inner, Some((i_item_sk#181 = item_sk#425))
>          :     :- Project [rnk#428,item_sk#429,item_sk#425]
>          :     :  +- Join Inner, Some((rnk#428 = rnk#432))
>          :     :     :- Project [item_sk#425,rnk#428]
>          :     :     :  +- Filter (rnk#428 < 11)
>          :     :     :     +- Window [item_sk#425,rank_col#426], 
> [rank(rank_col#426) windowspecdefinition(rank_col#426 ASC,ROWS BETWEEN 
> UNBOUNDED PRECEDING AND CURRENT ROW) AS rnk#428], [rank_col#426 ASC]
>          :     :     :        +- Project [item_sk#425,rank_col#426]
>          :     :     :           +- Filter havingCondition#439: boolean
>          :     :     :              +- Aggregate [ss_item_sk#282], 
> [((avg(cast(ss_net_profit#302 as double)),mode=Complete,isDistinct=false) > 
> (0.9 * scalarsubquery(Aggregate [ss_store_sk#287], 
> [(avg(cast(ss_net_profit#302 as double)),mode=Complete,isDistinct=false) AS 
> rank_col#427]
> +- Filter ((ss_store_sk#287 = 4) && isnull(ss_addr_sk#286))
>    +- Subquery store_sales
>       +- 
> Relation[ss_sold_date_sk#280,ss_sold_time_sk#281,ss_item_sk#282,ss_customer_sk#283,ss_cdemo_sk#284,ss_hdemo_sk#285,ss_addr_sk#286,ss_store_sk#287,ss_promo_sk#288,ss_ticket_number#289L,ss_quantity#290L,ss_wholesale_cost#291,ss_list_price#292,ss_sales_price#293,ss_ext_discount_amt#294,ss_ext_sales_price#295,ss_ext_wholesale_cost#296,ss_ext_list_price#297,ss_ext_tax#298,ss_coupon_amt#299,ss_net_paid#300,ss_net_paid_inc_tax#301,ss_net_profit#302]
>  ParquetRelation
> ))) AS havingCondition#439,ss_item_sk#282 AS 
> item_sk#425,(avg(cast(ss_net_profit#302 as 
> double)),mode=Complete,isDistinct=false) AS rank_col#426]
>          :     :     :                 +- Project 
> [ss_item_sk#282,ss_net_profit#302]
>          :     :     :                    +- Filter (ss_store_sk#287 = 4)
>          :     :     :                       +- 
> Relation[ss_sold_date_sk#280,ss_sold_time_sk#281,ss_item_sk#282,ss_customer_sk#283,ss_cdemo_sk#284,ss_hdemo_sk#285,ss_addr_sk#286,ss_store_sk#287,ss_promo_sk#288,ss_ticket_number#289L,ss_quantity#290L,ss_wholesale_cost#291,ss_list_price#292,ss_sales_price#293,ss_ext_discount_amt#294,ss_ext_sales_price#295,ss_ext_wholesale_cost#296,ss_ext_list_price#297,ss_ext_tax#298,ss_coupon_amt#299,ss_net_paid#300,ss_net_paid_inc_tax#301,ss_net_profit#302]
>  ParquetRelation
>          :     :     +- Project [item_sk#429,rnk#432]
>          :     :        +- Filter (rnk#432 < 11)
>          :     :           +- Window [item_sk#429,rank_col#430], 
> [rank(rank_col#430) windowspecdefinition(rank_col#430 DESC,ROWS BETWEEN 
> UNBOUNDED PRECEDING AND CURRENT ROW) AS rnk#432], [rank_col#430 DESC]
>          :     :              +- Project [item_sk#429,rank_col#430]
>          :     :                 +- Filter havingCondition#440: boolean
>          :     :                    +- Aggregate [ss_item_sk#282], 
> [((avg(cast(ss_net_profit#302 as double)),mode=Complete,isDistinct=false) > 
> (0.9 * scalarsubquery(Aggregate [ss_store_sk#287], 
> [(avg(cast(ss_net_profit#302 as double)),mode=Complete,isDistinct=false) AS 
> rank_col#431]
> +- Filter ((ss_store_sk#287 = 4) && isnull(ss_addr_sk#286))
>    +- Subquery store_sales
>       +- 
> Relation[ss_sold_date_sk#280,ss_sold_time_sk#281,ss_item_sk#282,ss_customer_sk#283,ss_cdemo_sk#284,ss_hdemo_sk#285,ss_addr_sk#286,ss_store_sk#287,ss_promo_sk#288,ss_ticket_number#289L,ss_quantity#290L,ss_wholesale_cost#291,ss_list_price#292,ss_sales_price#293,ss_ext_discount_amt#294,ss_ext_sales_price#295,ss_ext_wholesale_cost#296,ss_ext_list_price#297,ss_ext_tax#298,ss_coupon_amt#299,ss_net_paid#300,ss_net_paid_inc_tax#301,ss_net_profit#302]
>  ParquetRelation
> ))) AS havingCondition#440,ss_item_sk#282 AS 
> item_sk#429,(avg(cast(ss_net_profit#302 as 
> double)),mode=Complete,isDistinct=false) AS rank_col#430]
>          :     :                       +- Project 
> [ss_item_sk#282,ss_net_profit#302]
>          :     :                          +- Filter (ss_store_sk#287 = 4)
>          :     :                             +- 
> Relation[ss_sold_date_sk#280,ss_sold_time_sk#281,ss_item_sk#282,ss_customer_sk#283,ss_cdemo_sk#284,ss_hdemo_sk#285,ss_addr_sk#286,ss_store_sk#287,ss_promo_sk#288,ss_ticket_number#289L,ss_quantity#290L,ss_wholesale_cost#291,ss_list_price#292,ss_sales_price#293,ss_ext_discount_amt#294,ss_ext_sales_price#295,ss_ext_wholesale_cost#296,ss_ext_list_price#297,ss_ext_tax#298,ss_coupon_amt#299,ss_net_paid#300,ss_net_paid_inc_tax#301,ss_net_profit#302]
>  ParquetRelation
>          :     +- Project [i_item_sk#181,i_product_name#202]
>          :        +- 
> Relation[i_item_sk#181,i_item_id#182,i_rec_start_date#183,i_rec_end_date#184,i_item_desc#185,i_current_price#186,i_wholesale_cost#187,i_brand_id#188L,i_brand#189,i_class_id#190L,i_class#191,i_category_id#192L,i_category#193,i_manufact_id#194L,i_manufact#195,i_size#196,i_formulation#197,i_color#198,i_units#199,i_container#200,i_manager_id#201L,i_product_name#202]
>  ParquetRelation
>          +- Project [i_item_sk#457,i_product_name#478]
>             +- 
> Relation[i_item_sk#457,i_item_id#458,i_rec_start_date#459,i_rec_end_date#460,i_item_desc#461,i_current_price#462,i_wholesale_cost#463,i_brand_id#464L,i_brand#465,i_class_id#466L,i_class#467,i_category_id#468L,i_category#469,i_manufact_id#470L,i_manufact#471,i_size#472,i_formulation#473,i_color#474,i_units#475,i_container#476,i_manager_id#477L,i_product_name#478]
>  ParquetRelation
> ==
> Physical Plan ==TakeOrderedAndProject(limit=100, orderBy=[rnk#428 ASC], 
> output=[rnk#428,best_performing#433,worst_performing#434])
> +- WholeStageCodegen
>    :  +- Project [rnk#428,i_product_name#202 AS 
> best_performing#433,i_product_name#478 AS worst_performing#434]
>    :     +- BroadcastHashJoin [item_sk#429], [i_item_sk#457], BuildRight, None
>    :        :- Project [rnk#428,item_sk#429,i_product_name#202]
>    :        :  +- BroadcastHashJoin [item_sk#425], [i_item_sk#181], 
> BuildRight, None
>    :        :     :- Project [rnk#428,item_sk#429,item_sk#425]
>    :        :     :  +- INPUT
>    :        :     +- INPUT
>    :        +- INPUT
>    :- SortMergeJoin [rnk#428], [rnk#432], None
>    :  :- Sort [rnk#428 ASC], false, 0
>    :  :  +- WholeStageCodegen
>    :  :     :  +- Project [item_sk#425,rnk#428]
>    :  :     :     +- Filter (rnk#428 < 11)
>    :  :     :        +- INPUT
>    :  :     +- Window [item_sk#425,rank_col#426], [rank(rank_col#426) 
> windowspecdefinition(rank_col#426 ASC,ROWS BETWEEN UNBOUNDED PRECEDING AND 
> CURRENT ROW) AS rnk#428], [rank_col#426 ASC]
>    :  :        +- Sort [rank_col#426 ASC], false, 0
>    :  :           +- Exchange SinglePartition, None
>    :  :              +- WholeStageCodegen
>    :  :                 :  +- Project [item_sk#425,rank_col#426]
>    :  :                 :     +- Filter havingCondition#439: boolean
>    :  :                 :        +- TungstenAggregate(key=[ss_item_sk#282], 
> functions=[(avg(cast(ss_net_profit#302 as 
> double)),mode=Final,isDistinct=false)], 
> output=[havingCondition#439,item_sk#425,rank_col#426])
>    :  :                 :           +- INPUT
>    :  :                 +- Exchange hashpartitioning(ss_item_sk#282,200), None
>    :  :                    +- WholeStageCodegen
>    :  :                       :  +- TungstenAggregate(key=[ss_item_sk#282], 
> functions=[(avg(cast(ss_net_profit#302 as 
> double)),mode=Partial,isDistinct=false)], 
> output=[ss_item_sk#282,sum#506,count#507L])
>    :  :                       :     +- Project 
> [ss_item_sk#282,ss_net_profit#302]
>    :  :                       :        +- Filter (ss_store_sk#287 = 4)
>    :  :                       :           +- INPUT
>    :  :                       +- Scan 
> ParquetRelation[ss_item_sk#282,ss_net_profit#302,ss_store_sk#287] InputPaths: 
> hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/store_sales/_SUCCESS,
>  
> hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/store_sales/_common_metadata,
>  
> hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/store_sales/_metadata,
>  
> hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/store_sales/part-r-00000-2245f9f3-e00c-4c6c-b9cd-62179efcb22c.gz.parquet,
>  
> hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/store_sales/part-r-00001-2245f9f3-e00c-4c6c-b9cd-62179efcb22c.gz.parquet,
>  
> hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/store_sales/part-r-00002-2245f9f3-e00c-4c6c-b9cd-62179efcb22c.gz.parquet,
>  PushedFilters: [EqualTo(ss_store_sk,4)]
>    :  +- Sort [rnk#432 ASC], false, 0
>    :     +- WholeStageCodegen
>    :        :  +- Project [item_sk#429,rnk#432]
>    :        :     +- Filter (rnk#432 < 11)
>    :        :        +- INPUT
>    :        +- Window [item_sk#429,rank_col#430], [rank(rank_col#430) 
> windowspecdefinition(rank_col#430 DESC,ROWS BETWEEN UNBOUNDED PRECEDING AND 
> CURRENT ROW) AS rnk#432], [rank_col#430 DESC]
>    :           +- Sort [rank_col#430 DESC], false, 0
>    :              +- Exchange SinglePartition, None
>    :                 +- WholeStageCodegen
>    :                    :  +- Project [item_sk#429,rank_col#430]
>    :                    :     +- Filter havingCondition#440: boolean
>    :                    :        +- TungstenAggregate(key=[ss_item_sk#282], 
> functions=[(avg(cast(ss_net_profit#302 as 
> double)),mode=Final,isDistinct=false)], 
> output=[havingCondition#440,item_sk#429,rank_col#430])
>    :                    :           +- INPUT
>    :                    +- Exchange hashpartitioning(ss_item_sk#282,200), None
>    :                       +- WholeStageCodegen
>    :                          :  +- TungstenAggregate(key=[ss_item_sk#282], 
> functions=[(avg(cast(ss_net_profit#302 as 
> double)),mode=Partial,isDistinct=false)], 
> output=[ss_item_sk#282,sum#511,count#512L])
>    :                          :     +- Project 
> [ss_item_sk#282,ss_net_profit#302]
>    :                          :        +- Filter (ss_store_sk#287 = 4)
>    :                          :           +- INPUT
>    :                          +- Scan 
> ParquetRelation[ss_item_sk#282,ss_net_profit#302,ss_store_sk#287] InputPaths: 
> hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/store_sales/_SUCCESS,
>  
> hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/store_sales/_common_metadata,
>  
> hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/store_sales/_metadata,
>  
> hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/store_sales/part-r-00000-2245f9f3-e00c-4c6c-b9cd-62179efcb22c.gz.parquet,
>  
> hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/store_sales/part-r-00001-2245f9f3-e00c-4c6c-b9cd-62179efcb22c.gz.parquet,
>  
> hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/store_sales/part-r-00002-2245f9f3-e00c-4c6c-b9cd-62179efcb22c.gz.parquet,
>  PushedFilters: [EqualTo(ss_store_sk,4)]
>    :- Scan ParquetRelation[i_item_sk#181,i_product_name#202] InputPaths: 
> hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/item/_SUCCESS,
>  
> hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/item/_common_metadata,
>  
> hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/item/_metadata,
>  
> hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/item/part-r-00000-09ab6229-4b98-42a8-988e-2f1706436007.gz.parquet,
>  
> hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/item/part-r-00001-09ab6229-4b98-42a8-988e-2f1706436007.gz.parquet
>    +- Scan ParquetRelation[i_item_sk#457,i_product_name#478] InputPaths: 
> hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/item/_SUCCESS,
>  
> hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/item/_common_metadata,
>  
> hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/item/_metadata,
>  
> hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/item/part-r-00000-09ab6229-4b98-42a8-988e-2f1706436007.gz.parquet,
>  
> hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/item/part-r-00001-09ab6229-4b98-42a8-988e-2f1706436007.gz.parquet
> {noformat}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to