This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new cc927edf770 [SPARK-39248][SQL] Improve divide performance for decimal
type
cc927edf770 is described below
commit cc927edf770daa237993e076dc29b4793f4e2a84
Author: Yuming Wang <[email protected]>
AuthorDate: Tue May 24 21:49:20 2022 +0800
[SPARK-39248][SQL] Improve divide performance for decimal type
### What changes were proposed in this pull request?
Switch decimal type divide from
```
toJavaBigDecimal.divide(that.toJavaBigDecimal, MATH_CONTEXT)
```
to
```
toJavaBigDecimal.divide(that.toJavaBigDecimal, DecimalType.MAX_SCALE,
MATH_CONTEXT.getRoundingMode)
```
The difference is that [`preferredScale !=
scale`](https://github.com/openjdk/jdk8u-dev/blob/jdk8u342-b01/jdk/src/share/classes/java/math/BigDecimal.java#L4288)
is false if using the new API.
This is the stack trace if using the old API:
```
java.math.MutableBigInteger.divideKnuth(MutableBigInteger.java:1203)
java.math.MutableBigInteger.divideKnuth(MutableBigInteger.java:1163)
java.math.BigInteger.divideAndRemainderKnuth(BigInteger.java:2235)
java.math.BigInteger.divideAndRemainder(BigInteger.java:2223)
java.math.BigDecimal.createAndStripZerosToMatchScale(BigDecimal.java:4404)
java.math.BigDecimal.divideAndRound(BigDecimal.java:4294)
java.math.BigDecimal.divide(BigDecimal.java:4660)
java.math.BigDecimal.divide(BigDecimal.java:1753)
...
```
### Why are the changes needed?
Improve divide performance for decimal type.
Benchmark code:
```scala
import org.apache.spark.benchmark.Benchmark
val valuesPerIteration = 2880404L
val dir = "/tmp/spark/benchmark"
spark.range(valuesPerIteration).selectExpr("CAST(id AS DECIMAL(9, 2)) AS
d").write.mode("Overwrite").parquet(dir)
val benchmark = new Benchmark("Benchmark decimal", valuesPerIteration,
minNumIters = 5)
benchmark.addCase("d * 2 > 0") { _ =>
spark.read.parquet(dir).where("d * 2 >
0").write.format("noop").mode("Overwrite").save()
}
benchmark.addCase("d / 2 > 0") { _ =>
spark.read.parquet(dir).where("d / 2 >
0").write.format("noop").mode("Overwrite").save()
}
benchmark.run()
```
Before this PR:
```
Java HotSpot(TM) 64-Bit Server VM 1.8.0_281-b09 on Mac OS X 10.15.7
Intel(R) Core(TM) i9-9980HK CPU 2.40GHz
Benchmark decimal: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
d * 2 > 0 480 585
141 6.0 166.7 1.0X
d / 2 > 0 4689 4920
243 0.6 1627.9 0.1X
```
After this PR:
```
Java HotSpot(TM) 64-Bit Server VM 1.8.0_281-b09 on Mac OS X 10.15.7
Intel(R) Core(TM) i9-9980HK CPU 2.40GHz
Benchmark decimal: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
d * 2 > 0 529 580
35 5.4 183.6 1.0X
d / 2 > 0 811 916
80 3.6 281.4 0.7X
```
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Unit test.
Closes #36628 from wangyum/SPARK-39248.
Authored-by: Yuming Wang <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
---
.../scala/org/apache/spark/sql/types/Decimal.scala | 3 +-
.../inputs/ansi/decimalArithmeticOperations.sql | 9 +++
.../inputs/decimalArithmeticOperations.sql | 9 +++
.../ansi/decimalArithmeticOperations.sql.out | 92 +++++++++++++++++++++-
.../results/decimalArithmeticOperations.sql.out | 66 +++++++++++++++-
5 files changed, 175 insertions(+), 4 deletions(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
index 7a43d01eb2f..43203e4f397 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
@@ -502,7 +502,8 @@ final class Decimal extends Ordered[Decimal] with
Serializable {
Decimal(toJavaBigDecimal.multiply(that.toJavaBigDecimal, MATH_CONTEXT))
def / (that: Decimal): Decimal =
- if (that.isZero) null else
Decimal(toJavaBigDecimal.divide(that.toJavaBigDecimal, MATH_CONTEXT))
+ if (that.isZero) null else
Decimal(toJavaBigDecimal.divide(that.toJavaBigDecimal,
+ DecimalType.MAX_SCALE, MATH_CONTEXT.getRoundingMode))
def % (that: Decimal): Decimal =
if (that.isZero) null
diff --git
a/sql/core/src/test/resources/sql-tests/inputs/ansi/decimalArithmeticOperations.sql
b/sql/core/src/test/resources/sql-tests/inputs/ansi/decimalArithmeticOperations.sql
index d843847e6a1..c447511ba60 100644
---
a/sql/core/src/test/resources/sql-tests/inputs/ansi/decimalArithmeticOperations.sql
+++
b/sql/core/src/test/resources/sql-tests/inputs/ansi/decimalArithmeticOperations.sql
@@ -29,4 +29,13 @@ select 123456789123456789.1234567890 * 1.123456789123456789;
select 123456789123456789.1234567890 * 1.123456789123456789;
select 12345678912345.123456789123 / 0.000000012345678;
+select 1.0123456789012345678901234567890123456e36BD / 0.1;
+select 1.0123456789012345678901234567890123456e35BD / 1.0;
+select 1.0123456789012345678901234567890123456e34BD / 1.0;
+select 1.0123456789012345678901234567890123456e33BD / 1.0;
+select 1.0123456789012345678901234567890123456e32BD / 1.0;
+select 1.0123456789012345678901234567890123456e31BD / 1.0;
+select 1.0123456789012345678901234567890123456e31BD / 0.1;
+select 1.0123456789012345678901234567890123456e31BD / 10.0;
+
drop table decimals_test;
diff --git
a/sql/core/src/test/resources/sql-tests/inputs/decimalArithmeticOperations.sql
b/sql/core/src/test/resources/sql-tests/inputs/decimalArithmeticOperations.sql
index a3bc282cd6a..70bb9123994 100644
---
a/sql/core/src/test/resources/sql-tests/inputs/decimalArithmeticOperations.sql
+++
b/sql/core/src/test/resources/sql-tests/inputs/decimalArithmeticOperations.sql
@@ -83,4 +83,13 @@ select 12345678912345678912345678912.1234567 +
9999999999999999999999999999999.1
select 123456789123456789.1234567890 * 1.123456789123456789;
select 12345678912345.123456789123 / 0.000000012345678;
+select 1.0123456789012345678901234567890123456e36BD / 0.1;
+select 1.0123456789012345678901234567890123456e35BD / 1.0;
+select 1.0123456789012345678901234567890123456e34BD / 1.0;
+select 1.0123456789012345678901234567890123456e33BD / 1.0;
+select 1.0123456789012345678901234567890123456e32BD / 1.0;
+select 1.0123456789012345678901234567890123456e31BD / 1.0;
+select 1.0123456789012345678901234567890123456e31BD / 0.1;
+select 1.0123456789012345678901234567890123456e31BD / 10.0;
+
drop table decimals_test;
diff --git
a/sql/core/src/test/resources/sql-tests/results/ansi/decimalArithmeticOperations.sql.out
b/sql/core/src/test/resources/sql-tests/results/ansi/decimalArithmeticOperations.sql.out
index 9268a5f92d1..219b1e621e1 100644
---
a/sql/core/src/test/resources/sql-tests/results/ansi/decimalArithmeticOperations.sql.out
+++
b/sql/core/src/test/resources/sql-tests/results/ansi/decimalArithmeticOperations.sql.out
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
--- Number of queries: 16
+-- Number of queries: 24
-- !query
@@ -112,7 +112,7 @@ select 1e35BD / 0.1
struct<>
-- !query output
org.apache.spark.SparkArithmeticException
-[CANNOT_CHANGE_DECIMAL_PRECISION] Decimal(expanded,
1000000000000000000000000000000000000, 37, 0) cannot be represented as
Decimal(38, 6). If necessary set "spark.sql.ansi.enabled" to "false" to bypass
this error.
+[CANNOT_CHANGE_DECIMAL_PRECISION] Decimal(expanded,
1000000000000000000000000000000000000.00000000000000000000000000000000000000,
75, 38) cannot be represented as Decimal(38, 6). If necessary set
"spark.sql.ansi.enabled" to "false" to bypass this error.
== SQL(line 1, position 7) ==
select 1e35BD / 0.1
^^^^^^^^^^^^
@@ -142,6 +142,94 @@ struct<(12345678912345.123456789123 /
1.2345678E-8):decimal(38,9)>
1000000073899961059796.725866332
+-- !query
+select 1.0123456789012345678901234567890123456e36BD / 0.1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+[CANNOT_CHANGE_DECIMAL_PRECISION] Decimal(expanded,
10123456789012345678901234567890123456.00000000000000000000000000000000000000,
76, 38) cannot be represented as Decimal(38, 6). If necessary set
"spark.sql.ansi.enabled" to "false" to bypass this error.
+== SQL(line 1, position 7) ==
+select 1.0123456789012345678901234567890123456e36BD / 0.1
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+-- !query
+select 1.0123456789012345678901234567890123456e35BD / 1.0
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+[CANNOT_CHANGE_DECIMAL_PRECISION] Decimal(expanded,
101234567890123456789012345678901234.56000000000000000000000000000000000000,
74, 38) cannot be represented as Decimal(38, 6). If necessary set
"spark.sql.ansi.enabled" to "false" to bypass this error.
+== SQL(line 1, position 7) ==
+select 1.0123456789012345678901234567890123456e35BD / 1.0
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+-- !query
+select 1.0123456789012345678901234567890123456e34BD / 1.0
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+[CANNOT_CHANGE_DECIMAL_PRECISION] Decimal(expanded,
10123456789012345678901234567890123.45600000000000000000000000000000000000, 73,
38) cannot be represented as Decimal(38, 6). If necessary set
"spark.sql.ansi.enabled" to "false" to bypass this error.
+== SQL(line 1, position 7) ==
+select 1.0123456789012345678901234567890123456e34BD / 1.0
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+-- !query
+select 1.0123456789012345678901234567890123456e33BD / 1.0
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+[CANNOT_CHANGE_DECIMAL_PRECISION] Decimal(expanded,
1012345678901234567890123456789012.34560000000000000000000000000000000000, 72,
38) cannot be represented as Decimal(38, 6). If necessary set
"spark.sql.ansi.enabled" to "false" to bypass this error.
+== SQL(line 1, position 7) ==
+select 1.0123456789012345678901234567890123456e33BD / 1.0
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+-- !query
+select 1.0123456789012345678901234567890123456e32BD / 1.0
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+[CANNOT_CHANGE_DECIMAL_PRECISION] Decimal(expanded,
101234567890123456789012345678901.23456000000000000000000000000000000000, 71,
38) cannot be represented as Decimal(38, 6). If necessary set
"spark.sql.ansi.enabled" to "false" to bypass this error.
+== SQL(line 1, position 7) ==
+select 1.0123456789012345678901234567890123456e32BD / 1.0
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+-- !query
+select 1.0123456789012345678901234567890123456e31BD / 1.0
+-- !query schema
+struct<(10123456789012345678901234567890.123456 / 1.0):decimal(38,6)>
+-- !query output
+10123456789012345678901234567890.123456
+
+
+-- !query
+select 1.0123456789012345678901234567890123456e31BD / 0.1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+[CANNOT_CHANGE_DECIMAL_PRECISION] Decimal(expanded,
101234567890123456789012345678901.23456000000000000000000000000000000000, 71,
38) cannot be represented as Decimal(38, 6). If necessary set
"spark.sql.ansi.enabled" to "false" to bypass this error.
+== SQL(line 1, position 7) ==
+select 1.0123456789012345678901234567890123456e31BD / 0.1
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+-- !query
+select 1.0123456789012345678901234567890123456e31BD / 10.0
+-- !query schema
+struct<(10123456789012345678901234567890.123456 / 10.0):decimal(38,6)>
+-- !query output
+1012345678901234567890123456789.012346
+
+
-- !query
drop table decimals_test
-- !query schema
diff --git
a/sql/core/src/test/resources/sql-tests/results/decimalArithmeticOperations.sql.out
b/sql/core/src/test/resources/sql-tests/results/decimalArithmeticOperations.sql.out
index 1d92dc35010..f58950e7071 100644
---
a/sql/core/src/test/resources/sql-tests/results/decimalArithmeticOperations.sql.out
+++
b/sql/core/src/test/resources/sql-tests/results/decimalArithmeticOperations.sql.out
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
--- Number of queries: 40
+-- Number of queries: 48
-- !query
@@ -327,6 +327,70 @@ struct<(12345678912345.123456789123 /
1.2345678E-8):decimal(38,18)>
NULL
+-- !query
+select 1.0123456789012345678901234567890123456e36BD / 0.1
+-- !query schema
+struct<(1012345678901234567890123456789012345.6 / 0.1):decimal(38,2)>
+-- !query output
+NULL
+
+
+-- !query
+select 1.0123456789012345678901234567890123456e35BD / 1.0
+-- !query schema
+struct<(101234567890123456789012345678901234.56 / 1.0):decimal(38,3)>
+-- !query output
+NULL
+
+
+-- !query
+select 1.0123456789012345678901234567890123456e34BD / 1.0
+-- !query schema
+struct<(10123456789012345678901234567890123.456 / 1.0):decimal(38,3)>
+-- !query output
+10123456789012345678901234567890123.456
+
+
+-- !query
+select 1.0123456789012345678901234567890123456e33BD / 1.0
+-- !query schema
+struct<(1012345678901234567890123456789012.3456 / 1.0):decimal(38,4)>
+-- !query output
+1012345678901234567890123456789012.3456
+
+
+-- !query
+select 1.0123456789012345678901234567890123456e32BD / 1.0
+-- !query schema
+struct<(101234567890123456789012345678901.23456 / 1.0):decimal(38,5)>
+-- !query output
+101234567890123456789012345678901.23456
+
+
+-- !query
+select 1.0123456789012345678901234567890123456e31BD / 1.0
+-- !query schema
+struct<(10123456789012345678901234567890.123456 / 1.0):decimal(38,6)>
+-- !query output
+10123456789012345678901234567890.123456
+
+
+-- !query
+select 1.0123456789012345678901234567890123456e31BD / 0.1
+-- !query schema
+struct<(10123456789012345678901234567890.123456 / 0.1):decimal(38,6)>
+-- !query output
+NULL
+
+
+-- !query
+select 1.0123456789012345678901234567890123456e31BD / 10.0
+-- !query schema
+struct<(10123456789012345678901234567890.123456 / 10.0):decimal(38,7)>
+-- !query output
+1012345678901234567890123456789.0123456
+
+
-- !query
drop table decimals_test
-- !query schema
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]