hackerwjx111-arch opened a new issue, #8122:
URL: https://github.com/apache/incubator-gluten/issues/8122
### Backend
VL (Velox)
### Bug description
When running q5 with java it could output right result. However, with gluten
+velox , it will output error as below:
gluten/incubator-gluten/tools/gluten-it/common/src/main/resources/tpcds-queries/q5.sql
with ssr as (select s_store_id, sum(sales_price) as sales,
sum(profit) as profit, sum(return_amt) as returns,
sum(net_loss) as profit_loss from ( select ss_store_sk as store_sk,
ss_sold_date_sk as date_sk, ss_ext_sales_price as sales_price,
ss_net_profit as profit, cast(0 as decimal(7,2)) as
return_amt, cast(0 as decimal(7,2)) as net_loss from
store_sales union all select sr_store_sk as store_sk,
sr_returned_date_sk as date_sk, cast(0 as decimal(7,2)) as
sales_price, cast(0 as decimal(7,2)) as profit,
sr_return_amt as return_amt, sr_net_loss as net_loss from
store_returns ) salesreturns, date_dim, store where date_sk =
d_date_sk and d_date between cast('1998-08-04' as date)
and (cast('1998-08-04' as date) + interval '14' day) and store_sk =
s_store_sk group by s_stor
e_id) , csr as (select cp_catalog_page_id, sum(sales_price) as
sales, sum(profit) as profit, sum(return_amt) as returns,
sum(net_loss) as profit_loss from ( select cs_catalog_page_sk as page_sk,
cs_sold_date_sk as date_sk, cs_ext_sales_price as
sales_price, cs_net_profit as profit, cast(0 as
decimal(7,2)) as return_amt, cast(0 as decimal(7,2)) as net_loss
from catalog_sales union all select cr_catalog_page_sk as page_sk,
cr_returned_date_sk as date_sk, cast(0 as decimal(7,2)) as
sales_price, cast(0 as decimal(7,2)) as profit,
cr_return_amount as return_amt, cr_net_loss as net_loss from
catalog_returns ) salesreturns, date_dim, catalog_page where
date_sk = d_date_sk and d_date between cast('1998-08-04' as date)
and (cast('1998-08-04' as date) + interval '14' day) and pag
e_sk = cp_catalog_page_sk group by cp_catalog_page_id) , wsr as (select
web_site_id, sum(sales_price) as sales, sum(profit) as profit,
sum(return_amt) as returns, sum(net_loss) as profit_loss from
( select ws_web_site_sk as wsr_web_site_sk, ws_sold_date_sk as
date_sk, ws_ext_sales_price as sales_price,
ws_net_profit as profit, cast(0 as decimal(7,2)) as return_amt,
cast(0 as decimal(7,2)) as net_loss from web_sales union all
select ws_web_site_sk as wsr_web_site_sk, wr_returned_date_sk as
date_sk, cast(0 as decimal(7,2)) as sales_price, cast(0
as decimal(7,2)) as profit, wr_return_amt as return_amt,
wr_net_loss as net_loss from web_returns left outer join web_sales on
( wr_item_sk = ws_item_sk and wr_order_number = ws_order_number)
) salesreturns, date_dim, web_site where date_sk
= d_date_sk and d_date between cast('1998-08-04' as date)
and (cast('1998-08-04' as date) + interval '14' day) and
wsr_web_site_sk = web_site_sk group by web_site_id) select channel
, id , sum(sales) as sales , sum(returns) as returns ,
sum(profit) as profit from (select 'store channel' as channel ,
'store' || s_store_id as id , sales , returns , (profit
- profit_loss) as profit from ssr union all select 'catalog channel' as
channel , 'catalog_page' || cp_catalog_page_id as id , sales
, returns , (profit - profit_loss) as profit from csr union all
select 'web channel' as channel , 'web_site' || web_site_id as id
, sales , returns , (profit - profit_loss) as profit from
wsr ) x group by rollup (channel, id) order by channel ,id LIMIT
100 ;
24/11/14 23:19:00 ERROR TaskResources: Task -1 failed by error:
java.lang.IllegalStateException: Couldn't find d_date_sk#972 in
[d_date_sk#282]
at
org.apache.spark.sql.catalyst.expressions.BindReferences$$anonfun$bindReference$1.applyOrElse(BoundAttribute.scala:80)
at
org.apache.spark.sql.catalyst.expressions.BindReferences$$anonfun$bindReference$1.applyOrElse(BoundAttribute.scala:73)
at
org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:512)
at
org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:104)
at
org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:512)
at
org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:488)
at
org.apache.spark.sql.catalyst.trees.TreeNode.transform(TreeNode.scala:456)
at
org.apache.spark.sql.catalyst.expressions.BindReferences$.bindReference(BoundAttribute.scala:73)
at
org.apache.spark.sql.catalyst.expressions.BindReferences$.$anonfun$bindReferences$1(BoundAttribute.scala:94)
at scala.collection.immutable.List.map(List.scala:293)
at
org.apache.spark.sql.catalyst.expressions.BindReferences$.bindReferences(BoundAttribute.scala:94)
at
org.apache.spark.sql.catalyst.expressions.UnsafeProjection$.create(Projection.scala:161)
at
org.apache.spark.sql.execution.ColumnarBuildSideRelation$$anon$2.next(ColumnarBuildSideRelation.scala:144)
at
org.apache.spark.sql.execution.ColumnarBuildSideRelation$$anon$2.next(ColumnarBuildSideRelation.scala:111)
at scala.collection.Iterator$$anon$10.next(Iterator.scala:461)
at
scala.collection.TraversableOnce$FlattenOps$$anon$2.hasNext(TraversableOnce.scala:521)
at scala.collection.Iterator.foreach(Iterator.scala:943)
at scala.collection.Iterator.foreach$(Iterator.scala:943)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1431)
at scala.collection.generic.Growable.$plus$plus$eq(Growable.scala:62)
at
scala.collection.generic.Growable.$plus$plus$eq$(Growable.scala:53)
at
scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:105)
at
scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:49)
at scala.collection.TraversableOnce.to(TraversableOnce.scala:366)
at scala.collection.TraversableOnce.to$(TraversableOnce.scala:364)
at scala.collection.AbstractIterator.to(Iterator.scala:1431)
at
scala.collection.TraversableOnce.toBuffer(TraversableOnce.scala:358)
at
scala.collection.TraversableOnce.toBuffer$(TraversableOnce.scala:358)
at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1431)
at
scala.collection.TraversableOnce.toArray(TraversableOnce.scala:345)
at
scala.collection.TraversableOnce.toArray$(TraversableOnce.scala:339)
at scala.collection.AbstractIterator.toArray(Iterator.scala:1431)
at
org.apache.spark.sql.execution.ColumnarBuildSideRelation.$anonfun$transform$1(ColumnarBuildSideRelation.scala:175)
at
org.apache.spark.task.TaskResources$.runUnsafe(TaskResources.scala:99)
at
org.apache.spark.sql.execution.ColumnarBuildSideRelation.transform(ColumnarBuildSideRelation.scala:88)
at
org.apache.spark.sql.execution.ColumnarSubqueryBroadcastExec.$anonfun$relationFuture$3(ColumnarSubqueryBroadcastExec.scala:80)
at org.apache.gluten.utils.Arm$.withResource(Arm.scala:25)
at
org.apache.gluten.metrics.GlutenTimeMetric$.millis(GlutenTimeMetric.scala:37)
at
org.apache.spark.sql.execution.ColumnarSubqueryBroadcastExec.$anonfun$relationFuture$2(ColumnarSubqueryBroadcastExec.scala:75)
at
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withExecutionId$1(SQLExecution.scala:171)
at
org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:195)
at
org.apache.spark.sql.execution.SQLExecution$.withExecutionId(SQLExecution.scala:169)
at
org.apache.spark.sql.execution.ColumnarSubqueryBroadcastExec.$anonfun$relationFuture$1(ColumnarSubqueryBroadcastExec.scala:73)
at scala.concurrent.Future$.$anonfun$apply$1(Future.scala:659)
at scala.util.Success.$anonfun$map$1(Try.scala:255)
at scala.util.Success.map(Try.scala:213)
at scala.concurrent.Future.$anonfun$map$1(Future.scala:292)
at scala.concurrent.impl.Promise.liftedTree1$1(Promise.scala:33)
at
scala.concurrent.impl.Promise.$anonfun$transform$1(Promise.scala:33)
at scala.concurrent.impl.CallbackRunnable.run(Promise.scala:64)
at
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
at java.base/java.lang.Thread.run(Thread.java:829)
### Spark version
Spark-3.5.x
### Spark configurations
GLUTEN_JAR=./gluten/incubator-gluten/package/target/gluten-velox-bundle-spark3.5_2.12-ubuntu_22.04_x86_64-1.3.0-SNAPSHOT.jar
SPARK_HOME=./spark-3.5.3-bin-hadoop3/
cat tpcds_parquet.scala | ${SPARK_HOME}/bin/spark-shell \
--master local[1] --deploy-mode client \
--conf spark.plugins=org.apache.gluten.GlutenPlugin \
--conf spark.driver.extraClassPath=${GLUTEN_JAR} \
--conf spark.executor.extraClassPath=${GLUTEN_JAR} \
--conf spark.memory.offHeap.enabled=true \
--conf spark.memory.offHeap.size=80g \
--conf spark.gluten.sql.columnar.forceShuffledHashJoin=true \
--conf
spark.shuffle.manager=org.apache.spark.shuffle.sort.ColumnarShuffleManager \
--driver-memory 20g \
--executor-memory 20g \
--conf spark.executor.memoryOverhead=20g \
--conf spark.driver.maxResultSize=20g
### System information
Genoa x86 system
### Relevant logs
_No response_
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]