[
https://issues.apache.org/jira/browse/IMPALA-7049?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Michael Ho resolved IMPALA-7049.
--------------------------------
Resolution: Not A Bug
Fix Version/s: Not Applicable
Turns out that the new build wasn't deployed correctly. Sorry for the confusion.
> Scan node reservation calculation seems off
> -------------------------------------------
>
> Key: IMPALA-7049
> URL: https://issues.apache.org/jira/browse/IMPALA-7049
> Project: IMPALA
> Issue Type: Bug
> Components: Backend
> Affects Versions: Impala 2.13.0, Impala 3.1.0
> Reporter: Michael Ho
> Assignee: Tim Armstrong
> Priority: Critical
> Fix For: Not Applicable
>
> Attachments: profile.txt
>
>
> Running the query TPC-DS Q77a with a memory limit, we ran into the error
> *HDFS scan min reservation 0 must be >= min buffer size 8192*:
> {noformat}
> Query Type: QUERY
> Query State: EXCEPTION
> Query Status: HDFS scan min reservation 0 must be >= min buffer size 8192
> Sql Statement: /* Mem: 2375 MB. Coordinator: machine. */
> -- RESULT MISMATCH FROM ORIGINAL
> -- FIXED. TAKE ACTUAL RESULT AS EXPECTED
> with ss as
> (select s_store_sk,
> sum(ss_ext_sales_price) as sales,
> sum(ss_net_profit) as profit
> from store_sales,
> date_dim,
> store
> where ss_sold_date_sk = d_date_sk
> and cast(d_date as timestamp) between cast('2000-08-23' as timestamp)
> and (cast('2000-08-23' as timestamp) + interval 30 days)
> and ss_store_sk = s_store_sk
> group by s_store_sk)
> ,
> sr as
> (select s_store_sk,
> sum(sr_return_amt) as return_amt,
> sum(sr_net_loss) as profit_loss
> from store_returns,
> date_dim,
> store
> where sr_returned_date_sk = d_date_sk
> and cast(d_date as timestamp) between cast('2000-08-23' as timestamp)
> and (cast('2000-08-23' as timestamp) + interval 30 days)
> and sr_store_sk = s_store_sk
> group by s_store_sk),
> cs as
> (select cs_call_center_sk,
> sum(cs_ext_sales_price) as sales,
> sum(cs_net_profit) as profit
> from catalog_sales,
> date_dim
> where cs_sold_date_sk = d_date_sk
> and cast(d_date as timestamp) between cast('2000-08-23' as timestamp)
> and (cast('2000-08-23' as timestamp) + interval 30 days)
> group by cs_call_center_sk
> ),
> cr as
> (select cr_call_center_sk,
> sum(cr_return_amount) as return_amt,
> sum(cr_net_loss) as profit_loss
> from catalog_returns,
> date_dim
> where cr_returned_date_sk = d_date_sk
> and cast(d_date as timestamp) between cast('2000-08-23' as timestamp)
> and (cast('2000-08-23' as timestamp) + interval 30 days)
> group by cr_call_center_sk
> ),
> ws as
> ( select wp_web_page_sk,
> sum(ws_ext_sales_price) as sales,
> sum(ws_net_profit) as profit
> from web_sales,
> date_dim,
> web_page
> where ws_sold_date_sk = d_date_sk
> and cast(d_date as timestamp) between cast('2000-08-23' as timestamp)
> and (cast('2000-08-23' as timestamp) + interval 30 days)
> and ws_web_page_sk = wp_web_page_sk
> group by wp_web_page_sk),
> wr as
> (select wp_web_page_sk,
> sum(wr_return_amt) as return_amt,
> sum(wr_net_loss) as profit_loss
> from web_returns,
> date_dim,
> web_page
> where wr_returned_date_sk = d_date_sk
> and cast(d_date as timestamp) between cast('2000-08-23' as timestamp)
> and (cast('2000-08-23' as timestamp) + interval 30 days)
> and wr_web_page_sk = wp_web_page_sk
> group by wp_web_page_sk)
> ,
> results as
> (select channel
> , id
> , sum(sales) as sales
> , sum(return_amt) as return_amt
> , sum(profit) as profit
> from
> (select 'store channel' as channel
> , ss.s_store_sk as id
> , sales
> , coalesce(return_amt, 0) as return_amt
> , (profit - coalesce(profit_loss,0)) as profit
> from ss left join sr
> on ss.s_store_sk = sr.s_store_sk
> union all
> select 'catalog channel' as channel
> , cs_call_center_sk as id
> , sales
> , return_amt
> , (profit - profit_loss) as profit
> from cs
> , cr
> union all
> select 'web channel' as channel
> , ws.wp_web_page_sk as id
> , sales
> , coalesce(return_amt, 0) return_amt
> , (profit - coalesce(profit_loss,0)) as profit
> from ws left join wr
> on ws.wp_web_page_sk = wr.wp_web_page_sk
> ) x
> group by channel, id )
> select *
> from (
> select channel, id, sales, return_amt, profit from results
> union
> select channel, NULL AS id, sum(sales) as sales, sum(return_amt) as
> return_amt, sum(profit) as profit from results group by channel
> union
> select NULL AS channel, NULL AS id, sum(sales) as sales, sum(return_amt) as
> return_amt, sum(profit) as profit from results
> ) foo
> order by channel, id
> limit 100;
> Coordinator: machine
> Query Options (set by configuration):
> ABORT_ON_ERROR=1,MEM_LIMIT=2490368000
> Query Options (set by configuration and planner):
> ABORT_ON_ERROR=1,MEM_LIMIT=2490368000,MT_DOP=0
> Plan:
> {noformat}
> According to the code, the reservation for the scan node is supposed to be
> computed correctly in the FE but this doesn't appear to be the case
> {noformat}
> // Check if reservation was enough to allocate at least one buffer. The
> // reservation calculation in HdfsScanNode.java should guarantee this.
> // Hitting this error indicates a misconfiguration or bug.
> int64_t min_buffer_size =
> ExecEnv::GetInstance()->disk_io_mgr()->min_buffer_size();
> if (scan_range_params_->size() > 0
> && resource_profile_.min_reservation < min_buffer_size) {
> return Status(TErrorCode::INTERNAL_ERROR,
> Substitute("HDFS scan min reservation $0 must be >= min buffer size $1",
> resource_profile_.min_reservation, min_buffer_size));
> }
> {noformat}
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)