Chun Chang created DRILL-5327:
---------------------------------
Summary: Hash aggregate can return empty batch which can cause
schema change exception
Key: DRILL-5327
URL: https://issues.apache.org/jira/browse/DRILL-5327
Project: Apache Drill
Issue Type: Bug
Components: Functions - Drill
Affects Versions: 1.10.0
Reporter: Chun Chang
Priority: Blocker
Hash aggregate can return empty batches which cause drill to throw schema
change exception (not handling this type of schema change). This is not a new
bug. But a recent hash function change (a theoretically correct change) may
have increased the chance of hitting this issue. I don't have scientific data
to support my claim (in fact I don't believe it's the case), but a regular
regression run used to pass fails now due to this bug. My concern is that
existing drill users out there may have queries that used to work but fail now.
It will be difficult to explain why the new release is better for them. I put
this bug as blocker so we can discuss it before releasing 1.10.
{noformat}
/root/drillAutomation/framework-master/framework/resources/Advanced/tpcds/tpcds_sf1/original/text/query66.sql
Query:
-- start query 66 in stream 0 using template query66.tpl
SELECT w_warehouse_name,
w_warehouse_sq_ft,
w_city,
w_county,
w_state,
w_country,
ship_carriers,
year1,
Sum(jan_sales) AS jan_sales,
Sum(feb_sales) AS feb_sales,
Sum(mar_sales) AS mar_sales,
Sum(apr_sales) AS apr_sales,
Sum(may_sales) AS may_sales,
Sum(jun_sales) AS jun_sales,
Sum(jul_sales) AS jul_sales,
Sum(aug_sales) AS aug_sales,
Sum(sep_sales) AS sep_sales,
Sum(oct_sales) AS oct_sales,
Sum(nov_sales) AS nov_sales,
Sum(dec_sales) AS dec_sales,
Sum(jan_sales / w_warehouse_sq_ft) AS jan_sales_per_sq_foot,
Sum(feb_sales / w_warehouse_sq_ft) AS feb_sales_per_sq_foot,
Sum(mar_sales / w_warehouse_sq_ft) AS mar_sales_per_sq_foot,
Sum(apr_sales / w_warehouse_sq_ft) AS apr_sales_per_sq_foot,
Sum(may_sales / w_warehouse_sq_ft) AS may_sales_per_sq_foot,
Sum(jun_sales / w_warehouse_sq_ft) AS jun_sales_per_sq_foot,
Sum(jul_sales / w_warehouse_sq_ft) AS jul_sales_per_sq_foot,
Sum(aug_sales / w_warehouse_sq_ft) AS aug_sales_per_sq_foot,
Sum(sep_sales / w_warehouse_sq_ft) AS sep_sales_per_sq_foot,
Sum(oct_sales / w_warehouse_sq_ft) AS oct_sales_per_sq_foot,
Sum(nov_sales / w_warehouse_sq_ft) AS nov_sales_per_sq_foot,
Sum(dec_sales / w_warehouse_sq_ft) AS dec_sales_per_sq_foot,
Sum(jan_net) AS jan_net,
Sum(feb_net) AS feb_net,
Sum(mar_net) AS mar_net,
Sum(apr_net) AS apr_net,
Sum(may_net) AS may_net,
Sum(jun_net) AS jun_net,
Sum(jul_net) AS jul_net,
Sum(aug_net) AS aug_net,
Sum(sep_net) AS sep_net,
Sum(oct_net) AS oct_net,
Sum(nov_net) AS nov_net,
Sum(dec_net) AS dec_net
FROM (SELECT w_warehouse_name,
w_warehouse_sq_ft,
w_city,
w_county,
w_state,
w_country,
'ZOUROS'
|| ','
|| 'ZHOU' AS ship_carriers,
d_year AS year1,
Sum(CASE
WHEN d_moy = 1 THEN ws_ext_sales_price * ws_quantity
ELSE 0
END) AS jan_sales,
Sum(CASE
WHEN d_moy = 2 THEN ws_ext_sales_price * ws_quantity
ELSE 0
END) AS feb_sales,
Sum(CASE
WHEN d_moy = 3 THEN ws_ext_sales_price * ws_quantity
ELSE 0
END) AS mar_sales,
Sum(CASE
WHEN d_moy = 4 THEN ws_ext_sales_price * ws_quantity
ELSE 0
END) AS apr_sales,
Sum(CASE
WHEN d_moy = 5 THEN ws_ext_sales_price * ws_quantity
ELSE 0
END) AS may_sales,
Sum(CASE
WHEN d_moy = 6 THEN ws_ext_sales_price * ws_quantity
ELSE 0
END) AS jun_sales,
Sum(CASE
WHEN d_moy = 7 THEN ws_ext_sales_price * ws_quantity
ELSE 0
END) AS jul_sales,
Sum(CASE
WHEN d_moy = 8 THEN ws_ext_sales_price * ws_quantity
ELSE 0
END) AS aug_sales,
Sum(CASE
WHEN d_moy = 9 THEN ws_ext_sales_price * ws_quantity
ELSE 0
END) AS sep_sales,
Sum(CASE
WHEN d_moy = 10 THEN ws_ext_sales_price * ws_quantity
ELSE 0
END) AS oct_sales,
Sum(CASE
WHEN d_moy = 11 THEN ws_ext_sales_price * ws_quantity
ELSE 0
END) AS nov_sales,
Sum(CASE
WHEN d_moy = 12 THEN ws_ext_sales_price * ws_quantity
ELSE 0
END) AS dec_sales,
Sum(CASE
WHEN d_moy = 1 THEN ws_net_paid_inc_ship * ws_quantity
ELSE 0
END) AS jan_net,
Sum(CASE
WHEN d_moy = 2 THEN ws_net_paid_inc_ship * ws_quantity
ELSE 0
END) AS feb_net,
Sum(CASE
WHEN d_moy = 3 THEN ws_net_paid_inc_ship * ws_quantity
ELSE 0
END) AS mar_net,
Sum(CASE
WHEN d_moy = 4 THEN ws_net_paid_inc_ship * ws_quantity
ELSE 0
END) AS apr_net,
Sum(CASE
WHEN d_moy = 5 THEN ws_net_paid_inc_ship * ws_quantity
ELSE 0
END) AS may_net,
Sum(CASE
WHEN d_moy = 6 THEN ws_net_paid_inc_ship * ws_quantity
ELSE 0
END) AS jun_net,
Sum(CASE
WHEN d_moy = 7 THEN ws_net_paid_inc_ship * ws_quantity
ELSE 0
END) AS jul_net,
Sum(CASE
WHEN d_moy = 8 THEN ws_net_paid_inc_ship * ws_quantity
ELSE 0
END) AS aug_net,
Sum(CASE
WHEN d_moy = 9 THEN ws_net_paid_inc_ship * ws_quantity
ELSE 0
END) AS sep_net,
Sum(CASE
WHEN d_moy = 10 THEN ws_net_paid_inc_ship * ws_quantity
ELSE 0
END) AS oct_net,
Sum(CASE
WHEN d_moy = 11 THEN ws_net_paid_inc_ship * ws_quantity
ELSE 0
END) AS nov_net,
Sum(CASE
WHEN d_moy = 12 THEN ws_net_paid_inc_ship * ws_quantity
ELSE 0
END) AS dec_net
FROM web_sales,
warehouse,
date_dim,
time_dim,
ship_mode
WHERE ws_warehouse_sk = w_warehouse_sk
AND ws_sold_date_sk = d_date_sk
AND ws_sold_time_sk = t_time_sk
AND ws_ship_mode_sk = sm_ship_mode_sk
AND d_year = 1998
AND t_time BETWEEN 7249 AND 7249 + 28800
AND sm_carrier IN ( 'ZOUROS', 'ZHOU' )
GROUP BY w_warehouse_name,
w_warehouse_sq_ft,
w_city,
w_county,
w_state,
w_country,
d_year
UNION ALL
SELECT w_warehouse_name,
w_warehouse_sq_ft,
w_city,
w_county,
w_state,
w_country,
'ZOUROS'
|| ','
|| 'ZHOU' AS ship_carriers,
d_year AS year1,
Sum(CASE
WHEN d_moy = 1 THEN cs_ext_sales_price * cs_quantity
ELSE 0
END) AS jan_sales,
Sum(CASE
WHEN d_moy = 2 THEN cs_ext_sales_price * cs_quantity
ELSE 0
END) AS feb_sales,
Sum(CASE
WHEN d_moy = 3 THEN cs_ext_sales_price * cs_quantity
ELSE 0
END) AS mar_sales,
Sum(CASE
WHEN d_moy = 4 THEN cs_ext_sales_price * cs_quantity
ELSE 0
END) AS apr_sales,
Sum(CASE
WHEN d_moy = 5 THEN cs_ext_sales_price * cs_quantity
ELSE 0
END) AS may_sales,
Sum(CASE
WHEN d_moy = 6 THEN cs_ext_sales_price * cs_quantity
ELSE 0
END) AS jun_sales,
Sum(CASE
WHEN d_moy = 7 THEN cs_ext_sales_price * cs_quantity
ELSE 0
END) AS jul_sales,
Sum(CASE
WHEN d_moy = 8 THEN cs_ext_sales_price * cs_quantity
ELSE 0
END) AS aug_sales,
Sum(CASE
WHEN d_moy = 9 THEN cs_ext_sales_price * cs_quantity
ELSE 0
END) AS sep_sales,
Sum(CASE
WHEN d_moy = 10 THEN cs_ext_sales_price * cs_quantity
ELSE 0
END) AS oct_sales,
Sum(CASE
WHEN d_moy = 11 THEN cs_ext_sales_price * cs_quantity
ELSE 0
END) AS nov_sales,
Sum(CASE
WHEN d_moy = 12 THEN cs_ext_sales_price * cs_quantity
ELSE 0
END) AS dec_sales,
Sum(CASE
WHEN d_moy = 1 THEN cs_net_paid * cs_quantity
ELSE 0
END) AS jan_net,
Sum(CASE
WHEN d_moy = 2 THEN cs_net_paid * cs_quantity
ELSE 0
END) AS feb_net,
Sum(CASE
WHEN d_moy = 3 THEN cs_net_paid * cs_quantity
ELSE 0
END) AS mar_net,
Sum(CASE
WHEN d_moy = 4 THEN cs_net_paid * cs_quantity
ELSE 0
END) AS apr_net,
Sum(CASE
WHEN d_moy = 5 THEN cs_net_paid * cs_quantity
ELSE 0
END) AS may_net,
Sum(CASE
WHEN d_moy = 6 THEN cs_net_paid * cs_quantity
ELSE 0
END) AS jun_net,
Sum(CASE
WHEN d_moy = 7 THEN cs_net_paid * cs_quantity
ELSE 0
END) AS jul_net,
Sum(CASE
WHEN d_moy = 8 THEN cs_net_paid * cs_quantity
ELSE 0
END) AS aug_net,
Sum(CASE
WHEN d_moy = 9 THEN cs_net_paid * cs_quantity
ELSE 0
END) AS sep_net,
Sum(CASE
WHEN d_moy = 10 THEN cs_net_paid * cs_quantity
ELSE 0
END) AS oct_net,
Sum(CASE
WHEN d_moy = 11 THEN cs_net_paid * cs_quantity
ELSE 0
END) AS nov_net,
Sum(CASE
WHEN d_moy = 12 THEN cs_net_paid * cs_quantity
ELSE 0
END) AS dec_net
FROM catalog_sales,
warehouse,
date_dim,
time_dim,
ship_mode
WHERE cs_warehouse_sk = w_warehouse_sk
AND cs_sold_date_sk = d_date_sk
AND cs_sold_time_sk = t_time_sk
AND cs_ship_mode_sk = sm_ship_mode_sk
AND d_year = 1998
AND t_time BETWEEN 7249 AND 7249 + 28800
AND sm_carrier IN ( 'ZOUROS', 'ZHOU' )
GROUP BY w_warehouse_name,
w_warehouse_sq_ft,
w_city,
w_county,
w_state,
w_country,
d_year) x
GROUP BY w_warehouse_name,
w_warehouse_sq_ft,
w_city,
w_county,
w_state,
w_country,
ship_carriers,
year1
ORDER BY w_warehouse_name
LIMIT 100
Failed with exception
java.sql.SQLException: UNSUPPORTED_OPERATION ERROR: Hash aggregate does not
support schema changes
Fragment 0:0
[Error Id: 9af86933-491e-4ae4-b848-bf66cb4464f9 on atsqa6c88.qa.lab:31010]
at
org.apache.drill.jdbc.impl.DrillCursor.nextRowInternally(DrillCursor.java:489)
at org.apache.drill.jdbc.impl.DrillCursor.next(DrillCursor.java:593)
at
oadd.org.apache.calcite.avatica.AvaticaResultSet.next(AvaticaResultSet.java:215)
at
org.apache.drill.jdbc.impl.DrillResultSetImpl.next(DrillResultSetImpl.java:140)
at
org.apache.drill.test.framework.DrillTestJdbc.executeQuery(DrillTestJdbc.java:218)
at
org.apache.drill.test.framework.DrillTestJdbc.run(DrillTestJdbc.java:101)
at
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471)
at java.util.concurrent.FutureTask.run(FutureTask.java:262)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:744)
Caused by: oadd.org.apache.drill.common.exceptions.UserRemoteException:
UNSUPPORTED_OPERATION ERROR: Hash aggregate does not support schema changes
Fragment 0:0
[Error Id: 9af86933-491e-4ae4-b848-bf66cb4464f9 on atsqa6c88.qa.lab:31010]
at
oadd.org.apache.drill.exec.rpc.user.QueryResultHandler.resultArrived(QueryResultHandler.java:123)
at
oadd.org.apache.drill.exec.rpc.user.UserClient.handle(UserClient.java:343)
at
oadd.org.apache.drill.exec.rpc.user.UserClient.handle(UserClient.java:88)
at
oadd.org.apache.drill.exec.rpc.RpcBus$InboundHandler.decode(RpcBus.java:274)
at
oadd.org.apache.drill.exec.rpc.RpcBus$InboundHandler.decode(RpcBus.java:244)
at
oadd.io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:89)
at
oadd.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:339)
at
oadd.io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:324)
at
oadd.io.netty.handler.timeout.IdleStateHandler.channelRead(IdleStateHandler.java:254)
at
oadd.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:339)
at
oadd.io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:324)
at
oadd.io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:103)
at
oadd.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:339)
at
oadd.io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:324)
at
oadd.io.netty.handler.codec.ByteToMessageDecoder.channelRead(ByteToMessageDecoder.java:242)
at
oadd.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:339)
at
oadd.io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:324)
at
oadd.io.netty.channel.ChannelInboundHandlerAdapter.channelRead(ChannelInboundHandlerAdapter.java:86)
at
oadd.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:339)
at
oadd.io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:324)
at
oadd.io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:847)
at
oadd.io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:131)
at
oadd.io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:511)
at
oadd.io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:468)
at
oadd.io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:382)
at oadd.io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:354)
at
oadd.io.netty.util.concurrent.SingleThreadEventExecutor$2.run(SingleThreadEventExecutor.java:111)
... 1 more
{noformat}
--
This message was sent by Atlassian JIRA
(v6.3.15#6346)