[ 
https://issues.apache.org/jira/browse/IMPALA-7049?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16481320#comment-16481320
 ] 

Tim Armstrong commented on IMPALA-7049:
---------------------------------------

Attached a profile from an affected query. It doesn't really make sense to me - 
the plan looks like it's from before the below commit, but that's the commit 
that added the error message:

{noformat}

commit fb5dc9eb484e54cf9f37d06168392c5bc2a0f4fe
Author: Tim Armstrong <tarmstr...@cloudera.com>
Date:   Sun Oct 29 12:38:47 2017 -0700

    IMPALA-4835: switch I/O buffers to buffer pool
{noformat}

> Scan node reservation calculation seems off
> -------------------------------------------
>
>                 Key: IMPALA-7049
>                 URL: https://issues.apache.org/jira/browse/IMPALA-7049
>             Project: IMPALA
>          Issue Type: Bug
>          Components: Backend
>    Affects Versions: Impala 2.13.0, Impala 3.1.0
>            Reporter: Michael Ho
>            Assignee: Tim Armstrong
>            Priority: Critical
>         Attachments: profile.txt
>
>
> Running the query TPC-DS Q77a with a memory limit, we ran into the error 
> *HDFS scan min reservation 0 must be >= min buffer size 8192*:
> {noformat}
>     Query Type: QUERY
>     Query State: EXCEPTION
>     Query Status: HDFS scan min reservation 0 must be >= min buffer size 8192
>     Sql Statement: /* Mem: 2375 MB. Coordinator: machine. */
> -- RESULT MISMATCH FROM ORIGINAL
> -- FIXED. TAKE ACTUAL RESULT AS EXPECTED
> with ss as
>  (select s_store_sk,
>          sum(ss_ext_sales_price) as sales,
>          sum(ss_net_profit) as profit
>  from store_sales,
>       date_dim,
>       store
>  where ss_sold_date_sk = d_date_sk
>        and cast(d_date as timestamp) between cast('2000-08-23' as timestamp)
>                   and (cast('2000-08-23' as timestamp) + interval 30 days)
>        and ss_store_sk = s_store_sk
>  group by s_store_sk)
>  ,
>  sr as
>  (select s_store_sk,
>          sum(sr_return_amt) as return_amt,
>          sum(sr_net_loss) as profit_loss
>  from store_returns,
>       date_dim,
>       store
>  where sr_returned_date_sk = d_date_sk
>        and cast(d_date as timestamp) between cast('2000-08-23' as timestamp)
>                   and (cast('2000-08-23' as timestamp) + interval 30 days)
>        and sr_store_sk = s_store_sk
>  group by s_store_sk),
>  cs as
>  (select cs_call_center_sk,
>         sum(cs_ext_sales_price) as sales,
>         sum(cs_net_profit) as profit
>  from catalog_sales,
>       date_dim
>  where cs_sold_date_sk = d_date_sk
>        and cast(d_date as timestamp) between cast('2000-08-23' as timestamp)
>                   and (cast('2000-08-23' as timestamp) + interval 30 days)
>  group by cs_call_center_sk
>  ),
>  cr as
>  (select cr_call_center_sk,
>          sum(cr_return_amount) as return_amt,
>          sum(cr_net_loss) as profit_loss
>  from catalog_returns,
>       date_dim
>  where cr_returned_date_sk = d_date_sk
>        and cast(d_date as timestamp) between cast('2000-08-23' as timestamp)
>                   and (cast('2000-08-23' as timestamp) + interval 30 days)
>  group by cr_call_center_sk
>  ),
>  ws as
>  ( select wp_web_page_sk,
>         sum(ws_ext_sales_price) as sales,
>         sum(ws_net_profit) as profit
>  from web_sales,
>       date_dim,
>       web_page
>  where ws_sold_date_sk = d_date_sk
>        and cast(d_date as timestamp) between cast('2000-08-23' as timestamp)
>                   and (cast('2000-08-23' as timestamp) + interval 30 days)
>        and ws_web_page_sk = wp_web_page_sk
>  group by wp_web_page_sk),
>  wr as
>  (select wp_web_page_sk,
>         sum(wr_return_amt) as return_amt,
>         sum(wr_net_loss) as profit_loss
>  from web_returns,
>       date_dim,
>       web_page
>  where wr_returned_date_sk = d_date_sk
>        and cast(d_date as timestamp) between cast('2000-08-23' as timestamp)
>                   and (cast('2000-08-23' as timestamp) + interval 30 days)
>        and wr_web_page_sk = wp_web_page_sk
>  group by wp_web_page_sk)
>  ,
>  results as
>  (select channel
>         , id
>         , sum(sales) as sales
>         , sum(return_amt) as return_amt
>         , sum(profit) as profit
>  from
>  (select 'store channel' as channel
>         , ss.s_store_sk as id
>         , sales
>         , coalesce(return_amt, 0) as return_amt
>         , (profit - coalesce(profit_loss,0)) as profit
>  from   ss left join sr
>         on  ss.s_store_sk = sr.s_store_sk
>  union all
>  select 'catalog channel' as channel
>         , cs_call_center_sk as id
>         , sales
>         , return_amt
>         , (profit - profit_loss) as profit
>  from  cs
>        , cr
>  union all
>  select 'web channel' as channel
>         , ws.wp_web_page_sk as id
>         , sales
>         , coalesce(return_amt, 0) return_amt
>         , (profit - coalesce(profit_loss,0)) as profit
>  from   ws left join wr
>         on  ws.wp_web_page_sk = wr.wp_web_page_sk
>  ) x
>  group by channel, id )
>   select  *
>  from (
>  select channel, id, sales, return_amt, profit from  results
>  union
>  select channel, NULL AS id, sum(sales) as sales, sum(return_amt) as 
> return_amt, sum(profit) as profit from  results group by channel
>  union
>  select NULL AS channel, NULL AS id, sum(sales) as sales, sum(return_amt) as 
> return_amt, sum(profit) as profit from  results
> ) foo
> order by channel, id
>  limit 100;
>     Coordinator: machine
>     Query Options (set by configuration): 
> ABORT_ON_ERROR=1,MEM_LIMIT=2490368000
>     Query Options (set by configuration and planner): 
> ABORT_ON_ERROR=1,MEM_LIMIT=2490368000,MT_DOP=0
>     Plan: 
>  {noformat}
> According to the code, the reservation for the scan node is supposed to be 
> computed correctly in the FE but this doesn't appear to be the case
> {noformat}
>   // Check if reservation was enough to allocate at least one buffer. The
>   // reservation calculation in HdfsScanNode.java should guarantee this.
>   // Hitting this error indicates a misconfiguration or bug.
>   int64_t min_buffer_size = 
> ExecEnv::GetInstance()->disk_io_mgr()->min_buffer_size();
>   if (scan_range_params_->size() > 0
>       && resource_profile_.min_reservation < min_buffer_size) {
>     return Status(TErrorCode::INTERNAL_ERROR,
>       Substitute("HDFS scan min reservation $0 must be >= min buffer size $1",
>        resource_profile_.min_reservation, min_buffer_size));
>   }
>  {noformat}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-all-unsubscr...@impala.apache.org
For additional commands, e-mail: issues-all-h...@impala.apache.org

Reply via email to