[ https://issues.apache.org/jira/browse/SPARK-14616?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
JESSE CHEN closed SPARK-14616. ------------------------------ Resolution: Not A Problem > TreeNodeException running Q44 and 58 on Parquet tables > ------------------------------------------------------ > > Key: SPARK-14616 > URL: https://issues.apache.org/jira/browse/SPARK-14616 > Project: Spark > Issue Type: Bug > Components: SQL > Affects Versions: 2.0.0 > Reporter: JESSE CHEN > > {code:title=tpcds q44} > select asceding.rnk, i1.i_product_name best_performing, i2.i_product_name > worst_performing > from(select * > from (select item_sk,rank() over (order by rank_col asc) rnk > from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col > from store_sales ss1 > where ss_store_sk = 4 > group by ss_item_sk > having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) > rank_col > from store_sales > where ss_store_sk = 4 > and ss_addr_sk is null > group by ss_store_sk))V1)V11 > where rnk < 11) asceding, > (select * > from (select item_sk,rank() over (order by rank_col desc) rnk > from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col > from store_sales ss1 > where ss_store_sk = 4 > group by ss_item_sk > having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) > rank_col > from store_sales > where ss_store_sk = 4 > and ss_addr_sk is null > group by ss_store_sk))V2)V21 > where rnk < 11) descending, > item i1, > item i2 > where asceding.rnk = descending.rnk > and i1.i_item_sk=asceding.item_sk > and i2.i_item_sk=descending.item_sk > order by asceding.rnk > limit 100; > {code} > {noformat} > bin/spark-sql --driver-memory 10g --verbose --master yarn-client --packages > com.databricks:spark-csv_2.10:1.3.0 --executor-memory 4g --num-executors 80 > --executor-cores 2 --database hadoopds1g -f q44.sql > {noformat} > {noformat} > org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree: > Exchange SinglePartition, None > +- WholeStageCodegen > : +- Project [item_sk#0,rank_col#1] > : +- Filter havingCondition#219: boolean > : +- TungstenAggregate(key=[ss_item_sk#12], > functions=[(avg(ss_net_profit#32),mode=Final,isDistinct=false)], > output=[havingCondition#219,item_sk#0,rank_col#1]) > : +- INPUT > +- Exchange hashpartitioning(ss_item_sk#12,200), None > +- WholeStageCodegen > : +- TungstenAggregate(key=[ss_item_sk#12], > functions=[(avg(ss_net_profit#32),mode=Partial,isDistinct=false)], > output=[ss_item_sk#12,sum#612,count#613L]) > : +- Project [ss_item_sk#12,ss_net_profit#32] > : +- Filter (ss_store_sk#17 = 4) > : +- INPUT > +- Scan ParquetRelation: > hadoopds1g.store_sales[ss_item_sk#12,ss_net_profit#32,ss_store_sk#17] > InputPaths: > hdfs://bigaperf116.svl.ibm.com:8020/apps/hive/warehouse/hadoopds1g.db/store_sales, > PushedFilters: [EqualTo(ss_store_sk,4)] > at > org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:47) > at > org.apache.spark.sql.execution.Exchange.doExecute(Exchange.scala:105) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:118) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:116) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150) > at > org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116) > at org.apache.spark.sql.execution.Sort.doExecute(Sort.scala:60) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:118) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:116) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150) > at > org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116) > at org.apache.spark.sql.execution.Window.doExecute(Window.scala:288) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:118) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:116) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150) > at > org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116) > at > org.apache.spark.sql.execution.InputAdapter.upstream(WholeStageCodegen.scala:176) > at > org.apache.spark.sql.execution.Filter.upstream(basicOperators.scala:73) > at > org.apache.spark.sql.execution.Project.upstream(basicOperators.scala:35) > at > org.apache.spark.sql.execution.WholeStageCodegen.doExecute(WholeStageCodegen.scala:279) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:118) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:116) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150) > at > org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116) > at org.apache.spark.sql.execution.Sort.doExecute(Sort.scala:60) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:118) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:116) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150) > at > org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116) > at > org.apache.spark.sql.execution.joins.SortMergeJoin.doExecute(SortMergeJoin.scala:63) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:118) > {noformat} > Note this happens only when using parquet tables. Flat file (csv) works fine > and returns correct query results. > Additionally, query 58 also encounters the same exception. The query is as > follows: > {noformat} > with ss_items as > (select i_item_id item_id > ,sum(ss_ext_sales_price) ss_item_rev > from store_sales > ,item > ,date_dim > where ss_item_sk = i_item_sk > and d_date in (select d_date > from date_dim > where d_week_seq = (select d_week_seq > from date_dim > where d_date = '2000-01-03')) > and ss_sold_date_sk = d_date_sk > group by i_item_id), > cs_items as > (select i_item_id item_id > ,sum(cs_ext_sales_price) cs_item_rev > from catalog_sales > ,item > ,date_dim > where cs_item_sk = i_item_sk > and d_date in (select d_date > from date_dim > where d_week_seq = (select d_week_seq > from date_dim > where d_date = '2000-01-03')) > and cs_sold_date_sk = d_date_sk > group by i_item_id), > ws_items as > (select i_item_id item_id > ,sum(ws_ext_sales_price) ws_item_rev > from web_sales > ,item > ,date_dim > where ws_item_sk = i_item_sk > and d_date in (select d_date > from date_dim > where d_week_seq =(select d_week_seq > from date_dim > where d_date = '2000-01-03')) > and ws_sold_date_sk = d_date_sk > group by i_item_id) > select ss_items.item_id > ,ss_item_rev > ,ss_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 ss_dev > ,cs_item_rev > ,cs_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 cs_dev > ,ws_item_rev > ,ws_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 ws_dev > ,(ss_item_rev+cs_item_rev+ws_item_rev)/3 average > from ss_items,cs_items,ws_items > where ss_items.item_id=cs_items.item_id > and ss_items.item_id=ws_items.item_id > and ss_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev > and ss_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev > and cs_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev > and cs_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev > and ws_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev > and ws_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev > order by item_id > ,ss_item_rev > limit 100; > {noformat} > Not certain if related. But seems a case a bad plan was created. > Here is are logical, optimized and physical plans created for query 44: > {noformat} > Logical Plan =='Limit 100 > +- 'Sort ['asceding.rnk ASC], true > +- 'Project > [unresolvedalias('asceding.rnk,None),unresolvedalias('i1.i_product_name AS > best_performing#433,None),unresolvedalias('i2.i_product_name AS > worst_performing#434,None)] > +- 'Filter ((('asceding.rnk = 'descending.rnk) && ('i1.i_item_sk = > 'asceding.item_sk)) && ('i2.i_item_sk = 'descending.item_sk)) > +- 'Join Inner, None > :- 'Join Inner, None > : :- 'Join Inner, None > : : :- 'Subquery asceding > : : : +- 'Project [unresolvedalias(*,None)] > : : : +- 'Filter ('rnk < 11) > : : : +- 'Subquery V11 > : : : +- 'Project > [unresolvedalias('item_sk,None),unresolvedalias('rank() > windowspecdefinition('rank_col ASC,UnspecifiedFrame) AS rnk#428,None)] > : : : +- 'Subquery V1 > : : : +- 'Filter cast(('avg('ss_net_profit) > > (0.9 * scalarsubquery('Aggregate ['ss_store_sk], > [unresolvedalias('avg('ss_net_profit) AS rank_col#427,None)] > +- 'Filter (('ss_store_sk = 4) && isnull('ss_addr_sk)) > +- 'UnresolvedRelation `store_sales`, None > ))) as boolean) > : : : +- 'Aggregate ['ss_item_sk], > [unresolvedalias('ss_item_sk AS > item_sk#425,None),unresolvedalias('avg('ss_net_profit) AS rank_col#426,None)] > : : : +- 'Filter ('ss_store_sk = 4) > : : : +- 'UnresolvedRelation > `store_sales`, Some(ss1) > : : +- 'Subquery descending > : : +- 'Project [unresolvedalias(*,None)] > : : +- 'Filter ('rnk < 11) > : : +- 'Subquery V21 > : : +- 'Project > [unresolvedalias('item_sk,None),unresolvedalias('rank() > windowspecdefinition('rank_col DESC,UnspecifiedFrame) AS rnk#432,None)] > : : +- 'Subquery V2 > : : +- 'Filter cast(('avg('ss_net_profit) > > (0.9 * scalarsubquery('Aggregate ['ss_store_sk], > [unresolvedalias('avg('ss_net_profit) AS rank_col#431,None)] > +- 'Filter (('ss_store_sk = 4) && isnull('ss_addr_sk)) > +- 'UnresolvedRelation `store_sales`, None > ))) as boolean) > : : +- 'Aggregate ['ss_item_sk], > [unresolvedalias('ss_item_sk AS > item_sk#429,None),unresolvedalias('avg('ss_net_profit) AS rank_col#430,None)] > : : +- 'Filter ('ss_store_sk = 4) > : : +- 'UnresolvedRelation > `store_sales`, Some(ss1) > : +- 'UnresolvedRelation `item`, Some(i1) > +- 'UnresolvedRelation `item`, Some(i2) > == > Analyzed Plan ==Limit 100 > +- Sort [rnk#428 ASC], true > +- Project [rnk#428,i_product_name#202 AS > best_performing#433,i_product_name#478 AS worst_performing#434] > +- Filter (((rnk#428 = rnk#432) && (i_item_sk#181 = item_sk#425)) && > (i_item_sk#457 = item_sk#429)) > +- Join Inner, None > :- Join Inner, None > : :- Join Inner, None > : : :- Subquery asceding > : : : +- Project [item_sk#425,rnk#428] > : : : +- Filter (rnk#428 < 11) > : : : +- Subquery V11 > : : : +- Project [item_sk#425,rnk#428] > : : : +- Project > [item_sk#425,rank_col#426,rnk#428,rnk#428] > : : : +- Window [item_sk#425,rank_col#426], > [rank(rank_col#426) windowspecdefinition(rank_col#426 ASC,ROWS BETWEEN > UNBOUNDED PRECEDING AND CURRENT ROW) AS rnk#428], [rank_col#426 ASC] > : : : +- Project [item_sk#425,rank_col#426] > : : : +- Subquery V1 > : : : +- Project > [item_sk#425,rank_col#426] > : : : +- Filter > havingCondition#439: boolean > : : : +- Aggregate > [ss_item_sk#282], [cast(((avg(cast(ss_net_profit#302 as > double)),mode=Complete,isDistinct=false) > (cast(0.9 as double) * > scalarsubquery(Aggregate [ss_store_sk#287], [(avg(cast(ss_net_profit#302 as > double)),mode=Complete,isDistinct=false) AS rank_col#427] > +- Filter ((ss_store_sk#287 = 4) && isnull(ss_addr_sk#286)) > +- Subquery store_sales > +- > Relation[ss_sold_date_sk#280,ss_sold_time_sk#281,ss_item_sk#282,ss_customer_sk#283,ss_cdemo_sk#284,ss_hdemo_sk#285,ss_addr_sk#286,ss_store_sk#287,ss_promo_sk#288,ss_ticket_number#289L,ss_quantity#290L,ss_wholesale_cost#291,ss_list_price#292,ss_sales_price#293,ss_ext_discount_amt#294,ss_ext_sales_price#295,ss_ext_wholesale_cost#296,ss_ext_list_price#297,ss_ext_tax#298,ss_coupon_amt#299,ss_net_paid#300,ss_net_paid_inc_tax#301,ss_net_profit#302] > ParquetRelation > ))) as boolean) AS havingCondition#439,ss_item_sk#282 AS > item_sk#425,(avg(cast(ss_net_profit#302 as > double)),mode=Complete,isDistinct=false) AS rank_col#426] > : : : +- Filter > (ss_store_sk#287 = 4) > : : : +- Subquery ss1 > : : : +- Subquery > store_sales > : : : +- > Relation[ss_sold_date_sk#280,ss_sold_time_sk#281,ss_item_sk#282,ss_customer_sk#283,ss_cdemo_sk#284,ss_hdemo_sk#285,ss_addr_sk#286,ss_store_sk#287,ss_promo_sk#288,ss_ticket_number#289L,ss_quantity#290L,ss_wholesale_cost#291,ss_list_price#292,ss_sales_price#293,ss_ext_discount_amt#294,ss_ext_sales_price#295,ss_ext_wholesale_cost#296,ss_ext_list_price#297,ss_ext_tax#298,ss_coupon_amt#299,ss_net_paid#300,ss_net_paid_inc_tax#301,ss_net_profit#302] > ParquetRelation > : : +- Subquery descending > : : +- Project [item_sk#429,rnk#432] > : : +- Filter (rnk#432 < 11) > : : +- Subquery V21 > : : +- Project [item_sk#429,rnk#432] > : : +- Project > [item_sk#429,rank_col#430,rnk#432,rnk#432] > : : +- Window [item_sk#429,rank_col#430], > [rank(rank_col#430) windowspecdefinition(rank_col#430 DESC,ROWS BETWEEN > UNBOUNDED PRECEDING AND CURRENT ROW) AS rnk#432], [rank_col#430 DESC] > : : +- Project [item_sk#429,rank_col#430] > : : +- Subquery V2 > : : +- Project > [item_sk#429,rank_col#430] > : : +- Filter > havingCondition#440: boolean > : : +- Aggregate > [ss_item_sk#282], [cast(((avg(cast(ss_net_profit#302 as > double)),mode=Complete,isDistinct=false) > (cast(0.9 as double) * > scalarsubquery(Aggregate [ss_store_sk#287], [(avg(cast(ss_net_profit#302 as > double)),mode=Complete,isDistinct=false) AS rank_col#431] > +- Filter ((ss_store_sk#287 = 4) && isnull(ss_addr_sk#286)) > +- Subquery store_sales > +- > Relation[ss_sold_date_sk#280,ss_sold_time_sk#281,ss_item_sk#282,ss_customer_sk#283,ss_cdemo_sk#284,ss_hdemo_sk#285,ss_addr_sk#286,ss_store_sk#287,ss_promo_sk#288,ss_ticket_number#289L,ss_quantity#290L,ss_wholesale_cost#291,ss_list_price#292,ss_sales_price#293,ss_ext_discount_amt#294,ss_ext_sales_price#295,ss_ext_wholesale_cost#296,ss_ext_list_price#297,ss_ext_tax#298,ss_coupon_amt#299,ss_net_paid#300,ss_net_paid_inc_tax#301,ss_net_profit#302] > ParquetRelation > ))) as boolean) AS havingCondition#440,ss_item_sk#282 AS > item_sk#429,(avg(cast(ss_net_profit#302 as > double)),mode=Complete,isDistinct=false) AS rank_col#430] > : : +- Filter > (ss_store_sk#287 = 4) > : : +- Subquery ss1 > : : +- Subquery > store_sales > : : +- > Relation[ss_sold_date_sk#280,ss_sold_time_sk#281,ss_item_sk#282,ss_customer_sk#283,ss_cdemo_sk#284,ss_hdemo_sk#285,ss_addr_sk#286,ss_store_sk#287,ss_promo_sk#288,ss_ticket_number#289L,ss_quantity#290L,ss_wholesale_cost#291,ss_list_price#292,ss_sales_price#293,ss_ext_discount_amt#294,ss_ext_sales_price#295,ss_ext_wholesale_cost#296,ss_ext_list_price#297,ss_ext_tax#298,ss_coupon_amt#299,ss_net_paid#300,ss_net_paid_inc_tax#301,ss_net_profit#302] > ParquetRelation > : +- Subquery i1 > : +- Subquery item > : +- > Relation[i_item_sk#181,i_item_id#182,i_rec_start_date#183,i_rec_end_date#184,i_item_desc#185,i_current_price#186,i_wholesale_cost#187,i_brand_id#188L,i_brand#189,i_class_id#190L,i_class#191,i_category_id#192L,i_category#193,i_manufact_id#194L,i_manufact#195,i_size#196,i_formulation#197,i_color#198,i_units#199,i_container#200,i_manager_id#201L,i_product_name#202] > ParquetRelation > +- Subquery i2 > +- Subquery item > +- > Relation[i_item_sk#457,i_item_id#458,i_rec_start_date#459,i_rec_end_date#460,i_item_desc#461,i_current_price#462,i_wholesale_cost#463,i_brand_id#464L,i_brand#465,i_class_id#466L,i_class#467,i_category_id#468L,i_category#469,i_manufact_id#470L,i_manufact#471,i_size#472,i_formulation#473,i_color#474,i_units#475,i_container#476,i_manager_id#477L,i_product_name#478] > ParquetRelation > == > Optimized Plan ==Limit 100 > +- Sort [rnk#428 ASC], true > +- Project [rnk#428,i_product_name#202 AS > best_performing#433,i_product_name#478 AS worst_performing#434] > +- Join Inner, Some((i_item_sk#457 = item_sk#429)) > :- Project [rnk#428,item_sk#429,i_product_name#202] > : +- Join Inner, Some((i_item_sk#181 = item_sk#425)) > : :- Project [rnk#428,item_sk#429,item_sk#425] > : : +- Join Inner, Some((rnk#428 = rnk#432)) > : : :- Project [item_sk#425,rnk#428] > : : : +- Filter (rnk#428 < 11) > : : : +- Window [item_sk#425,rank_col#426], > [rank(rank_col#426) windowspecdefinition(rank_col#426 ASC,ROWS BETWEEN > UNBOUNDED PRECEDING AND CURRENT ROW) AS rnk#428], [rank_col#426 ASC] > : : : +- Project [item_sk#425,rank_col#426] > : : : +- Filter havingCondition#439: boolean > : : : +- Aggregate [ss_item_sk#282], > [((avg(cast(ss_net_profit#302 as double)),mode=Complete,isDistinct=false) > > (0.9 * scalarsubquery(Aggregate [ss_store_sk#287], > [(avg(cast(ss_net_profit#302 as double)),mode=Complete,isDistinct=false) AS > rank_col#427] > +- Filter ((ss_store_sk#287 = 4) && isnull(ss_addr_sk#286)) > +- Subquery store_sales > +- > Relation[ss_sold_date_sk#280,ss_sold_time_sk#281,ss_item_sk#282,ss_customer_sk#283,ss_cdemo_sk#284,ss_hdemo_sk#285,ss_addr_sk#286,ss_store_sk#287,ss_promo_sk#288,ss_ticket_number#289L,ss_quantity#290L,ss_wholesale_cost#291,ss_list_price#292,ss_sales_price#293,ss_ext_discount_amt#294,ss_ext_sales_price#295,ss_ext_wholesale_cost#296,ss_ext_list_price#297,ss_ext_tax#298,ss_coupon_amt#299,ss_net_paid#300,ss_net_paid_inc_tax#301,ss_net_profit#302] > ParquetRelation > ))) AS havingCondition#439,ss_item_sk#282 AS > item_sk#425,(avg(cast(ss_net_profit#302 as > double)),mode=Complete,isDistinct=false) AS rank_col#426] > : : : +- Project > [ss_item_sk#282,ss_net_profit#302] > : : : +- Filter (ss_store_sk#287 = 4) > : : : +- > Relation[ss_sold_date_sk#280,ss_sold_time_sk#281,ss_item_sk#282,ss_customer_sk#283,ss_cdemo_sk#284,ss_hdemo_sk#285,ss_addr_sk#286,ss_store_sk#287,ss_promo_sk#288,ss_ticket_number#289L,ss_quantity#290L,ss_wholesale_cost#291,ss_list_price#292,ss_sales_price#293,ss_ext_discount_amt#294,ss_ext_sales_price#295,ss_ext_wholesale_cost#296,ss_ext_list_price#297,ss_ext_tax#298,ss_coupon_amt#299,ss_net_paid#300,ss_net_paid_inc_tax#301,ss_net_profit#302] > ParquetRelation > : : +- Project [item_sk#429,rnk#432] > : : +- Filter (rnk#432 < 11) > : : +- Window [item_sk#429,rank_col#430], > [rank(rank_col#430) windowspecdefinition(rank_col#430 DESC,ROWS BETWEEN > UNBOUNDED PRECEDING AND CURRENT ROW) AS rnk#432], [rank_col#430 DESC] > : : +- Project [item_sk#429,rank_col#430] > : : +- Filter havingCondition#440: boolean > : : +- Aggregate [ss_item_sk#282], > [((avg(cast(ss_net_profit#302 as double)),mode=Complete,isDistinct=false) > > (0.9 * scalarsubquery(Aggregate [ss_store_sk#287], > [(avg(cast(ss_net_profit#302 as double)),mode=Complete,isDistinct=false) AS > rank_col#431] > +- Filter ((ss_store_sk#287 = 4) && isnull(ss_addr_sk#286)) > +- Subquery store_sales > +- > Relation[ss_sold_date_sk#280,ss_sold_time_sk#281,ss_item_sk#282,ss_customer_sk#283,ss_cdemo_sk#284,ss_hdemo_sk#285,ss_addr_sk#286,ss_store_sk#287,ss_promo_sk#288,ss_ticket_number#289L,ss_quantity#290L,ss_wholesale_cost#291,ss_list_price#292,ss_sales_price#293,ss_ext_discount_amt#294,ss_ext_sales_price#295,ss_ext_wholesale_cost#296,ss_ext_list_price#297,ss_ext_tax#298,ss_coupon_amt#299,ss_net_paid#300,ss_net_paid_inc_tax#301,ss_net_profit#302] > ParquetRelation > ))) AS havingCondition#440,ss_item_sk#282 AS > item_sk#429,(avg(cast(ss_net_profit#302 as > double)),mode=Complete,isDistinct=false) AS rank_col#430] > : : +- Project > [ss_item_sk#282,ss_net_profit#302] > : : +- Filter (ss_store_sk#287 = 4) > : : +- > Relation[ss_sold_date_sk#280,ss_sold_time_sk#281,ss_item_sk#282,ss_customer_sk#283,ss_cdemo_sk#284,ss_hdemo_sk#285,ss_addr_sk#286,ss_store_sk#287,ss_promo_sk#288,ss_ticket_number#289L,ss_quantity#290L,ss_wholesale_cost#291,ss_list_price#292,ss_sales_price#293,ss_ext_discount_amt#294,ss_ext_sales_price#295,ss_ext_wholesale_cost#296,ss_ext_list_price#297,ss_ext_tax#298,ss_coupon_amt#299,ss_net_paid#300,ss_net_paid_inc_tax#301,ss_net_profit#302] > ParquetRelation > : +- Project [i_item_sk#181,i_product_name#202] > : +- > Relation[i_item_sk#181,i_item_id#182,i_rec_start_date#183,i_rec_end_date#184,i_item_desc#185,i_current_price#186,i_wholesale_cost#187,i_brand_id#188L,i_brand#189,i_class_id#190L,i_class#191,i_category_id#192L,i_category#193,i_manufact_id#194L,i_manufact#195,i_size#196,i_formulation#197,i_color#198,i_units#199,i_container#200,i_manager_id#201L,i_product_name#202] > ParquetRelation > +- Project [i_item_sk#457,i_product_name#478] > +- > Relation[i_item_sk#457,i_item_id#458,i_rec_start_date#459,i_rec_end_date#460,i_item_desc#461,i_current_price#462,i_wholesale_cost#463,i_brand_id#464L,i_brand#465,i_class_id#466L,i_class#467,i_category_id#468L,i_category#469,i_manufact_id#470L,i_manufact#471,i_size#472,i_formulation#473,i_color#474,i_units#475,i_container#476,i_manager_id#477L,i_product_name#478] > ParquetRelation > == > Physical Plan ==TakeOrderedAndProject(limit=100, orderBy=[rnk#428 ASC], > output=[rnk#428,best_performing#433,worst_performing#434]) > +- WholeStageCodegen > : +- Project [rnk#428,i_product_name#202 AS > best_performing#433,i_product_name#478 AS worst_performing#434] > : +- BroadcastHashJoin [item_sk#429], [i_item_sk#457], BuildRight, None > : :- Project [rnk#428,item_sk#429,i_product_name#202] > : : +- BroadcastHashJoin [item_sk#425], [i_item_sk#181], > BuildRight, None > : : :- Project [rnk#428,item_sk#429,item_sk#425] > : : : +- INPUT > : : +- INPUT > : +- INPUT > :- SortMergeJoin [rnk#428], [rnk#432], None > : :- Sort [rnk#428 ASC], false, 0 > : : +- WholeStageCodegen > : : : +- Project [item_sk#425,rnk#428] > : : : +- Filter (rnk#428 < 11) > : : : +- INPUT > : : +- Window [item_sk#425,rank_col#426], [rank(rank_col#426) > windowspecdefinition(rank_col#426 ASC,ROWS BETWEEN UNBOUNDED PRECEDING AND > CURRENT ROW) AS rnk#428], [rank_col#426 ASC] > : : +- Sort [rank_col#426 ASC], false, 0 > : : +- Exchange SinglePartition, None > : : +- WholeStageCodegen > : : : +- Project [item_sk#425,rank_col#426] > : : : +- Filter havingCondition#439: boolean > : : : +- TungstenAggregate(key=[ss_item_sk#282], > functions=[(avg(cast(ss_net_profit#302 as > double)),mode=Final,isDistinct=false)], > output=[havingCondition#439,item_sk#425,rank_col#426]) > : : : +- INPUT > : : +- Exchange hashpartitioning(ss_item_sk#282,200), None > : : +- WholeStageCodegen > : : : +- TungstenAggregate(key=[ss_item_sk#282], > functions=[(avg(cast(ss_net_profit#302 as > double)),mode=Partial,isDistinct=false)], > output=[ss_item_sk#282,sum#506,count#507L]) > : : : +- Project > [ss_item_sk#282,ss_net_profit#302] > : : : +- Filter (ss_store_sk#287 = 4) > : : : +- INPUT > : : +- Scan > ParquetRelation[ss_item_sk#282,ss_net_profit#302,ss_store_sk#287] InputPaths: > hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/store_sales/_SUCCESS, > > hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/store_sales/_common_metadata, > > hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/store_sales/_metadata, > > hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/store_sales/part-r-00000-2245f9f3-e00c-4c6c-b9cd-62179efcb22c.gz.parquet, > > hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/store_sales/part-r-00001-2245f9f3-e00c-4c6c-b9cd-62179efcb22c.gz.parquet, > > hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/store_sales/part-r-00002-2245f9f3-e00c-4c6c-b9cd-62179efcb22c.gz.parquet, > PushedFilters: [EqualTo(ss_store_sk,4)] > : +- Sort [rnk#432 ASC], false, 0 > : +- WholeStageCodegen > : : +- Project [item_sk#429,rnk#432] > : : +- Filter (rnk#432 < 11) > : : +- INPUT > : +- Window [item_sk#429,rank_col#430], [rank(rank_col#430) > windowspecdefinition(rank_col#430 DESC,ROWS BETWEEN UNBOUNDED PRECEDING AND > CURRENT ROW) AS rnk#432], [rank_col#430 DESC] > : +- Sort [rank_col#430 DESC], false, 0 > : +- Exchange SinglePartition, None > : +- WholeStageCodegen > : : +- Project [item_sk#429,rank_col#430] > : : +- Filter havingCondition#440: boolean > : : +- TungstenAggregate(key=[ss_item_sk#282], > functions=[(avg(cast(ss_net_profit#302 as > double)),mode=Final,isDistinct=false)], > output=[havingCondition#440,item_sk#429,rank_col#430]) > : : +- INPUT > : +- Exchange hashpartitioning(ss_item_sk#282,200), None > : +- WholeStageCodegen > : : +- TungstenAggregate(key=[ss_item_sk#282], > functions=[(avg(cast(ss_net_profit#302 as > double)),mode=Partial,isDistinct=false)], > output=[ss_item_sk#282,sum#511,count#512L]) > : : +- Project > [ss_item_sk#282,ss_net_profit#302] > : : +- Filter (ss_store_sk#287 = 4) > : : +- INPUT > : +- Scan > ParquetRelation[ss_item_sk#282,ss_net_profit#302,ss_store_sk#287] InputPaths: > hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/store_sales/_SUCCESS, > > hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/store_sales/_common_metadata, > > hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/store_sales/_metadata, > > hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/store_sales/part-r-00000-2245f9f3-e00c-4c6c-b9cd-62179efcb22c.gz.parquet, > > hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/store_sales/part-r-00001-2245f9f3-e00c-4c6c-b9cd-62179efcb22c.gz.parquet, > > hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/store_sales/part-r-00002-2245f9f3-e00c-4c6c-b9cd-62179efcb22c.gz.parquet, > PushedFilters: [EqualTo(ss_store_sk,4)] > :- Scan ParquetRelation[i_item_sk#181,i_product_name#202] InputPaths: > hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/item/_SUCCESS, > > hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/item/_common_metadata, > > hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/item/_metadata, > > hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/item/part-r-00000-09ab6229-4b98-42a8-988e-2f1706436007.gz.parquet, > > hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/item/part-r-00001-09ab6229-4b98-42a8-988e-2f1706436007.gz.parquet > +- Scan ParquetRelation[i_item_sk#457,i_product_name#478] InputPaths: > hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/item/_SUCCESS, > > hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/item/_common_metadata, > > hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/item/_metadata, > > hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/item/part-r-00000-09ab6229-4b98-42a8-988e-2f1706436007.gz.parquet, > > hdfs://bigaperf116.svl.ibm.com:8020/user/spark/hadoopds1gbparquet/item/part-r-00001-09ab6229-4b98-42a8-988e-2f1706436007.gz.parquet > {noformat} -- This message was sent by Atlassian JIRA (v6.3.4#6332) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org