JESSE CHEN created SPARK-18458:
----------------------------------

             Summary: core dumped running Spark SQL on large data volume (100TB)
                 Key: SPARK-18458
                 URL: https://issues.apache.org/jira/browse/SPARK-18458
             Project: Spark
          Issue Type: Bug
          Components: SQL
    Affects Versions: 1.6.0
            Reporter: JESSE CHEN
             Fix For: 2.0.0


Testing Spark SQL using TPC queries. Query 49 returns wrong results compared to 
official result set. This is at 1GB SF (validation run).

SparkSQL has right answer but in wrong order (and there is an 'order by' in the 
query).

Actual results:
{noformat}
store,9797,0.80000000000000000000,2,2]
[store,12641,0.81609195402298850575,3,3]
[store,6661,0.92207792207792207792,7,7]
[store,13013,0.94202898550724637681,8,8]
[store,9029,1.00000000000000000000,10,10]
[web,15597,0.66197183098591549296,3,3]
[store,14925,0.96470588235294117647,9,9]
[store,4063,1.00000000000000000000,10,10]
[catalog,8929,0.76250000000000000000,7,7]
[store,11589,0.82653061224489795918,6,6]
[store,1171,0.82417582417582417582,5,5]
[store,9471,0.77500000000000000000,1,1]
[catalog,12577,0.65591397849462365591,3,3]
[web,97,0.90361445783132530120,9,8]
[web,85,0.85714285714285714286,8,7]
[catalog,361,0.74647887323943661972,5,5]
[web,2915,0.69863013698630136986,4,4]
[web,117,0.92500000000000000000,10,9]
[catalog,9295,0.77894736842105263158,9,9]
[web,3305,0.73750000000000000000,6,16]
[catalog,16215,0.79069767441860465116,10,10]
[web,7539,0.59000000000000000000,1,1]
[catalog,17543,0.57142857142857142857,1,1]
[catalog,3411,0.71641791044776119403,4,4]
[web,11933,0.71717171717171717172,5,5]
[catalog,14513,0.63541666666666666667,2,2]
[store,15839,0.81632653061224489796,4,4]
[web,3337,0.62650602409638554217,2,2]
[web,5299,0.92708333333333333333,11,10]
[catalog,8189,0.74698795180722891566,6,6]
[catalog,14869,0.77173913043478260870,8,8]
[web,483,0.80000000000000000000,7,6]
{noformat}


Expected results:
{noformat}
+---------+-------+--------------------+-------------+---------------+
| CHANNEL |  ITEM |       RETURN_RATIO | RETURN_RANK | CURRENCY_RANK |
+---------+-------+--------------------+-------------+---------------+
| catalog | 17543 |  .5714285714285714 |           1 |             1 |
| catalog | 14513 |  .6354166666666666 |           2 |             2 |
| catalog | 12577 |  .6559139784946236 |           3 |             3 |
| catalog |  3411 |  .7164179104477611 |           4 |             4 |
| catalog |   361 |  .7464788732394366 |           5 |             5 |
| catalog |  8189 |  .7469879518072289 |           6 |             6 |
| catalog |  8929 |  .7625000000000000 |           7 |             7 |
| catalog | 14869 |  .7717391304347826 |           8 |             8 |
| catalog |  9295 |  .7789473684210526 |           9 |             9 |
| catalog | 16215 |  .7906976744186046 |          10 |            10 |
| store   |  9471 |  .7750000000000000 |           1 |             1 |
| store   |  9797 |  .8000000000000000 |           2 |             2 |
| store   | 12641 |  .8160919540229885 |           3 |             3 |
| store   | 15839 |  .8163265306122448 |           4 |             4 |
| store   |  1171 |  .8241758241758241 |           5 |             5 |
| store   | 11589 |  .8265306122448979 |           6 |             6 |
| store   |  6661 |  .9220779220779220 |           7 |             7 |
| store   | 13013 |  .9420289855072463 |           8 |             8 |
| store   | 14925 |  .9647058823529411 |           9 |             9 |
| store   |  4063 | 1.0000000000000000 |          10 |            10 |
| store   |  9029 | 1.0000000000000000 |          10 |            10 |
| web     |  7539 |  .5900000000000000 |           1 |             1 |
| web     |  3337 |  .6265060240963855 |           2 |             2 |
| web     | 15597 |  .6619718309859154 |           3 |             3 |
| web     |  2915 |  .6986301369863013 |           4 |             4 |
| web     | 11933 |  .7171717171717171 |           5 |             5 |
| web     |  3305 |  .7375000000000000 |           6 |            16 |
| web     |   483 |  .8000000000000000 |           7 |             6 |
| web     |    85 |  .8571428571428571 |           8 |             7 |
| web     |    97 |  .9036144578313253 |           9 |             8 |
| web     |   117 |  .9250000000000000 |          10 |             9 |
| web     |  5299 |  .9270833333333333 |          11 |            10 |
+---------+-------+--------------------+-------------+---------------+
{noformat}

Query used:
{noformat}
-- start query 49 in stream 0 using template query49.tpl and seed QUALIFICATION
  select  
 'web' as channel
 ,web.item
 ,web.return_ratio
 ,web.return_rank
 ,web.currency_rank
 from (
        select 
         item
        ,return_ratio
        ,currency_ratio
        ,rank() over (order by return_ratio) as return_rank
        ,rank() over (order by currency_ratio) as currency_rank
        from
        (       select ws.ws_item_sk as item
                ,(cast(sum(coalesce(wr.wr_return_quantity,0)) as decimal(15,4))/
                cast(sum(coalesce(ws.ws_quantity,0)) as decimal(15,4) )) as 
return_ratio
                ,(cast(sum(coalesce(wr.wr_return_amt,0)) as decimal(15,4))/
                cast(sum(coalesce(ws.ws_net_paid,0)) as decimal(15,4) )) as 
currency_ratio
                from 
                 web_sales ws left outer join web_returns wr 
                        on (ws.ws_order_number = wr.wr_order_number and 
                        ws.ws_item_sk = wr.wr_item_sk)
                 ,date_dim
                where 
                        wr.wr_return_amt > 10000 
                        and ws.ws_net_profit > 1
                         and ws.ws_net_paid > 0
                         and ws.ws_quantity > 0
                         and ws_sold_date_sk = d_date_sk
                         and d_year = 2001
                         and d_moy = 12
                group by ws.ws_item_sk
        ) in_web
 ) web
 where 
 (
 web.return_rank <= 10
 or
 web.currency_rank <= 10
 )
 union  
 select 
 'catalog' as channel
 ,catalog.item
 ,catalog.return_ratio
 ,catalog.return_rank
 ,catalog.currency_rank
 from (
        select 
         item
        ,return_ratio
        ,currency_ratio
        ,rank() over (order by return_ratio) as return_rank
        ,rank() over (order by currency_ratio) as currency_rank
        from
        (       select 
                cs.cs_item_sk as item
                ,(cast(sum(coalesce(cr.cr_return_quantity,0)) as decimal(15,4))/
                cast(sum(coalesce(cs.cs_quantity,0)) as decimal(15,4) )) as 
return_ratio
                ,(cast(sum(coalesce(cr.cr_return_amount,0)) as decimal(15,4))/
                cast(sum(coalesce(cs.cs_net_paid,0)) as decimal(15,4) )) as 
currency_ratio
                from 
                catalog_sales cs left outer join catalog_returns cr
                        on (cs.cs_order_number = cr.cr_order_number and 
                        cs.cs_item_sk = cr.cr_item_sk)
                ,date_dim
                where 
                        cr.cr_return_amount > 10000 
                        and cs.cs_net_profit > 1
                         and cs.cs_net_paid > 0
                         and cs.cs_quantity > 0
                         and cs_sold_date_sk = d_date_sk
                         and d_year = 2001
                         and d_moy = 12
                 group by cs.cs_item_sk
        ) in_cat
 ) catalog
 where 
 (
 catalog.return_rank <= 10
 or
 catalog.currency_rank <=10
 )
 union 
 select 
 'store' as channel
 ,store.item
 ,store.return_ratio
 ,store.return_rank
 ,store.currency_rank
 from (
        select 
         item
        ,return_ratio
        ,currency_ratio
        ,rank() over (order by return_ratio) as return_rank
        ,rank() over (order by currency_ratio) as currency_rank
        from
        (       select sts.ss_item_sk as item
                ,(cast(sum(coalesce(sr.sr_return_quantity,0)) as 
decimal(15,4))/cast(sum(coalesce(sts.ss_quantity,0)) as decimal(15,4) )) as 
return_ratio
                ,(cast(sum(coalesce(sr.sr_return_amt,0)) as 
decimal(15,4))/cast(sum(coalesce(sts.ss_net_paid,0)) as decimal(15,4) )) as 
currency_ratio
                from 
                store_sales sts left outer join store_returns sr
                        on (sts.ss_ticket_number = sr.sr_ticket_number and 
sts.ss_item_sk = sr.sr_item_sk)
                ,date_dim
                where 
                        sr.sr_return_amt > 10000 
                        and sts.ss_net_profit > 1
                         and sts.ss_net_paid > 0 
                         and sts.ss_quantity > 0
                         and ss_sold_date_sk = d_date_sk
                         and d_year = 2001 
                         and d_moy = 12
                group by sts.ss_item_sk
        ) in_store
 ) store
 where  (
 store.return_rank <= 10
 or 
 store.currency_rank <= 10
 )
 order by 1,4,5
   limit 100;
-- end query 49 in stream 0 using template query49.tpl
{noformat}




--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to