[ https://issues.apache.org/jira/browse/IMPALA-7049?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16481320#comment-16481320 ]
Tim Armstrong commented on IMPALA-7049: --------------------------------------- Attached a profile from an affected query. It doesn't really make sense to me - the plan looks like it's from before the below commit, but that's the commit that added the error message: {noformat} commit fb5dc9eb484e54cf9f37d06168392c5bc2a0f4fe Author: Tim Armstrong <tarmstr...@cloudera.com> Date: Sun Oct 29 12:38:47 2017 -0700 IMPALA-4835: switch I/O buffers to buffer pool {noformat} > Scan node reservation calculation seems off > ------------------------------------------- > > Key: IMPALA-7049 > URL: https://issues.apache.org/jira/browse/IMPALA-7049 > Project: IMPALA > Issue Type: Bug > Components: Backend > Affects Versions: Impala 2.13.0, Impala 3.1.0 > Reporter: Michael Ho > Assignee: Tim Armstrong > Priority: Critical > Attachments: profile.txt > > > Running the query TPC-DS Q77a with a memory limit, we ran into the error > *HDFS scan min reservation 0 must be >= min buffer size 8192*: > {noformat} > Query Type: QUERY > Query State: EXCEPTION > Query Status: HDFS scan min reservation 0 must be >= min buffer size 8192 > Sql Statement: /* Mem: 2375 MB. Coordinator: machine. */ > -- RESULT MISMATCH FROM ORIGINAL > -- FIXED. TAKE ACTUAL RESULT AS EXPECTED > with ss as > (select s_store_sk, > sum(ss_ext_sales_price) as sales, > sum(ss_net_profit) as profit > from store_sales, > date_dim, > store > where ss_sold_date_sk = d_date_sk > and cast(d_date as timestamp) between cast('2000-08-23' as timestamp) > and (cast('2000-08-23' as timestamp) + interval 30 days) > and ss_store_sk = s_store_sk > group by s_store_sk) > , > sr as > (select s_store_sk, > sum(sr_return_amt) as return_amt, > sum(sr_net_loss) as profit_loss > from store_returns, > date_dim, > store > where sr_returned_date_sk = d_date_sk > and cast(d_date as timestamp) between cast('2000-08-23' as timestamp) > and (cast('2000-08-23' as timestamp) + interval 30 days) > and sr_store_sk = s_store_sk > group by s_store_sk), > cs as > (select cs_call_center_sk, > sum(cs_ext_sales_price) as sales, > sum(cs_net_profit) as profit > from catalog_sales, > date_dim > where cs_sold_date_sk = d_date_sk > and cast(d_date as timestamp) between cast('2000-08-23' as timestamp) > and (cast('2000-08-23' as timestamp) + interval 30 days) > group by cs_call_center_sk > ), > cr as > (select cr_call_center_sk, > sum(cr_return_amount) as return_amt, > sum(cr_net_loss) as profit_loss > from catalog_returns, > date_dim > where cr_returned_date_sk = d_date_sk > and cast(d_date as timestamp) between cast('2000-08-23' as timestamp) > and (cast('2000-08-23' as timestamp) + interval 30 days) > group by cr_call_center_sk > ), > ws as > ( select wp_web_page_sk, > sum(ws_ext_sales_price) as sales, > sum(ws_net_profit) as profit > from web_sales, > date_dim, > web_page > where ws_sold_date_sk = d_date_sk > and cast(d_date as timestamp) between cast('2000-08-23' as timestamp) > and (cast('2000-08-23' as timestamp) + interval 30 days) > and ws_web_page_sk = wp_web_page_sk > group by wp_web_page_sk), > wr as > (select wp_web_page_sk, > sum(wr_return_amt) as return_amt, > sum(wr_net_loss) as profit_loss > from web_returns, > date_dim, > web_page > where wr_returned_date_sk = d_date_sk > and cast(d_date as timestamp) between cast('2000-08-23' as timestamp) > and (cast('2000-08-23' as timestamp) + interval 30 days) > and wr_web_page_sk = wp_web_page_sk > group by wp_web_page_sk) > , > results as > (select channel > , id > , sum(sales) as sales > , sum(return_amt) as return_amt > , sum(profit) as profit > from > (select 'store channel' as channel > , ss.s_store_sk as id > , sales > , coalesce(return_amt, 0) as return_amt > , (profit - coalesce(profit_loss,0)) as profit > from ss left join sr > on ss.s_store_sk = sr.s_store_sk > union all > select 'catalog channel' as channel > , cs_call_center_sk as id > , sales > , return_amt > , (profit - profit_loss) as profit > from cs > , cr > union all > select 'web channel' as channel > , ws.wp_web_page_sk as id > , sales > , coalesce(return_amt, 0) return_amt > , (profit - coalesce(profit_loss,0)) as profit > from ws left join wr > on ws.wp_web_page_sk = wr.wp_web_page_sk > ) x > group by channel, id ) > select * > from ( > select channel, id, sales, return_amt, profit from results > union > select channel, NULL AS id, sum(sales) as sales, sum(return_amt) as > return_amt, sum(profit) as profit from results group by channel > union > select NULL AS channel, NULL AS id, sum(sales) as sales, sum(return_amt) as > return_amt, sum(profit) as profit from results > ) foo > order by channel, id > limit 100; > Coordinator: machine > Query Options (set by configuration): > ABORT_ON_ERROR=1,MEM_LIMIT=2490368000 > Query Options (set by configuration and planner): > ABORT_ON_ERROR=1,MEM_LIMIT=2490368000,MT_DOP=0 > Plan: > {noformat} > According to the code, the reservation for the scan node is supposed to be > computed correctly in the FE but this doesn't appear to be the case > {noformat} > // Check if reservation was enough to allocate at least one buffer. The > // reservation calculation in HdfsScanNode.java should guarantee this. > // Hitting this error indicates a misconfiguration or bug. > int64_t min_buffer_size = > ExecEnv::GetInstance()->disk_io_mgr()->min_buffer_size(); > if (scan_range_params_->size() > 0 > && resource_profile_.min_reservation < min_buffer_size) { > return Status(TErrorCode::INTERNAL_ERROR, > Substitute("HDFS scan min reservation $0 must be >= min buffer size $1", > resource_profile_.min_reservation, min_buffer_size)); > } > {noformat} -- This message was sent by Atlassian JIRA (v7.6.3#76005) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-all-unsubscr...@impala.apache.org For additional commands, e-mail: issues-all-h...@impala.apache.org