[
https://issues.apache.org/jira/browse/SPARK-39173?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Yuming Wang updated SPARK-39173:
--------------------------------
Description:
How to reproduce this issue:
{code:scala}
sql(
"""
|CREATE TABLE store_sales(
| ss_item_sk INT,
| ss_ext_sales_price DECIMAL(38,4)) USING parquet
""".stripMargin)
sql(
"""
|CREATE TABLE item(
| i_item_sk INT,
| i_brand_id INT) USING parquet
""".stripMargin)
sql(
"""
|INSERT INTO store_sales VALUES
| (1, 9999999999999999999999999999999999.6012),
| (1, 9999999999999999999999999999999999.9234),
| (1, 9999999999999999999999999999999999.2856),
| (2, 6874.6012),
| (2, 2828.9223),
| (2, 6067.6034),
| (2, 6067.6034),
| (3, 999999999999999999999999999999999.2812),
| (3, 999999999999999999999999999999999.2823)
""".stripMargin)
sql(
"""
|INSERT INTO item VALUES
| (1, 7003002),
| (1, 7003002),
| (2, 10002003),
| (2, 10002003),
| (2, 10002003),
| (3, 10002004),
| (3, 10002004),
| (3, 10002004),
| (3, 10002004)
""".stripMargin)
Seq(-1, 1000000000L).foreach { broadcastThreshold =>
withSQLConf(
SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> broadcastThreshold.toString,
SQLConf.ANSI_ENABLED.key -> "true") {
val df = sql(
"""
|SELECT
| item.i_brand_id brand_id,
| avg(ss_ext_sales_price) avg_agg
|FROM store_sales, item
|WHERE store_sales.ss_item_sk = item.i_item_sk
|GROUP BY item.i_brand_id
""".stripMargin)
val error = intercept[SparkException] {
df.collect()
}
println("Error message: " + error.getMessage)
}
}
{code}
{noformat}
Error message: org.apache.spark.SparkArithmeticException:
[CANNOT_CHANGE_DECIMAL_PRECISION]
Decimal(expanded,999999999999999999999999999999999.28175,38,5}) cannot be
represented as Decimal(38, 6). If necessary set "spark.sql.ansi.enabled" to
false to bypass this error.
Error message: org.apache.spark.SparkArithmeticException: [ARITHMETIC_OVERFLOW]
Overflow in sum of decimals. If necessary set spark.sql.ansi.enabled to false
(except for ANSI interval type) to bypass this error.
{noformat}
was:
How to reproduce this issue:
{code:scala}
Seq(-1, 1000000000L).foreach { broadcastThreshold =>
withSQLConf(
SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> broadcastThreshold.toString,
SQLConf.ANSI_ENABLED.key -> "true") {
val df = sql(
"""
|SELECT
| item.i_brand_id brand_id,
| avg(ss_ext_sales_price) avg_agg
|FROM store_sales, item
|WHERE store_sales.ss_item_sk = item.i_item_sk
|GROUP BY item.i_brand_id
""".stripMargin)
val error = intercept[SparkException] {
df.collect()
}
println("Error message: " + error.getMessage)
}
}
{code}
{noformat}
Error message: org.apache.spark.SparkArithmeticException:
[CANNOT_CHANGE_DECIMAL_PRECISION]
Decimal(expanded,999999999999999999999999999999999.28175,38,5}) cannot be
represented as Decimal(38, 6). If necessary set "spark.sql.ansi.enabled" to
false to bypass this error.
Error message: org.apache.spark.SparkArithmeticException: [ARITHMETIC_OVERFLOW]
Overflow in sum of decimals. If necessary set spark.sql.ansi.enabled to false
(except for ANSI interval type) to bypass this error.
{noformat}
> The error message is different if disable broadcast join
> --------------------------------------------------------
>
> Key: SPARK-39173
> URL: https://issues.apache.org/jira/browse/SPARK-39173
> Project: Spark
> Issue Type: Bug
> Components: SQL
> Affects Versions: 3.4.0
> Reporter: Yuming Wang
> Priority: Major
>
> How to reproduce this issue:
> {code:scala}
> sql(
> """
> |CREATE TABLE store_sales(
> | ss_item_sk INT,
> | ss_ext_sales_price DECIMAL(38,4)) USING parquet
> """.stripMargin)
> sql(
> """
> |CREATE TABLE item(
> | i_item_sk INT,
> | i_brand_id INT) USING parquet
> """.stripMargin)
> sql(
> """
> |INSERT INTO store_sales VALUES
> | (1, 9999999999999999999999999999999999.6012),
> | (1, 9999999999999999999999999999999999.9234),
> | (1, 9999999999999999999999999999999999.2856),
> | (2, 6874.6012),
> | (2, 2828.9223),
> | (2, 6067.6034),
> | (2, 6067.6034),
> | (3, 999999999999999999999999999999999.2812),
> | (3, 999999999999999999999999999999999.2823)
> """.stripMargin)
> sql(
> """
> |INSERT INTO item VALUES
> | (1, 7003002),
> | (1, 7003002),
> | (2, 10002003),
> | (2, 10002003),
> | (2, 10002003),
> | (3, 10002004),
> | (3, 10002004),
> | (3, 10002004),
> | (3, 10002004)
> """.stripMargin)
> Seq(-1, 1000000000L).foreach { broadcastThreshold =>
> withSQLConf(
> SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> broadcastThreshold.toString,
> SQLConf.ANSI_ENABLED.key -> "true") {
> val df = sql(
> """
> |SELECT
> | item.i_brand_id brand_id,
> | avg(ss_ext_sales_price) avg_agg
> |FROM store_sales, item
> |WHERE store_sales.ss_item_sk = item.i_item_sk
> |GROUP BY item.i_brand_id
> """.stripMargin)
> val error = intercept[SparkException] {
> df.collect()
> }
> println("Error message: " + error.getMessage)
> }
> }
> {code}
> {noformat}
> Error message: org.apache.spark.SparkArithmeticException:
> [CANNOT_CHANGE_DECIMAL_PRECISION]
> Decimal(expanded,999999999999999999999999999999999.28175,38,5}) cannot be
> represented as Decimal(38, 6). If necessary set "spark.sql.ansi.enabled" to
> false to bypass this error.
> Error message: org.apache.spark.SparkArithmeticException:
> [ARITHMETIC_OVERFLOW] Overflow in sum of decimals. If necessary set
> spark.sql.ansi.enabled to false (except for ANSI interval type) to bypass
> this error.
> {noformat}
--
This message was sent by Atlassian Jira
(v8.20.7#820007)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]