spark git commit: [SPARK-25454][SQL] add a new config for picking minimum precision for integral literals

lixiao Wed, 26 Sep 2018 17:48:02 -0700

Repository: spark
Updated Branches:
  refs/heads/branch-2.3 2381d60a2 -> 26d893a4f



[SPARK-25454][SQL] add a new config for picking minimum precision for integral 
literals

## What changes were proposed in this pull request?

https://github.com/apache/spark/pull/20023 proposed to allow precision lose 
during decimal operations, to reduce the possibilities of overflow. This is a 
behavior change and is protected by the DECIMAL_OPERATIONS_ALLOW_PREC_LOSS 
config. However, that PR introduced another behavior change: pick a minimum 
precision for integral literals, which is not protected by a config. This PR 
add a new config for it: `spark.sql.literal.pickMinimumPrecision`.

This can allow users to work around issue in SPARK-25454, which is caused by a 
long-standing bug of negative scale.

## How was this patch tested?

a new test

Closes #22494 from cloud-fan/decimal.

Authored-by: Wenchen Fan <wenc...@databricks.com>
Signed-off-by: gatorsmile <gatorsm...@gmail.com>
(cherry picked from commit d0990e3dfee752a6460a6360e1a773138364d774)
Signed-off-by: gatorsmile <gatorsm...@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/26d893a4
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/26d893a4
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/26d893a4

Branch: refs/heads/branch-2.3
Commit: 26d893a4f64de18222942568f7735114447a6ab7
Parents: 2381d60
Author: Wenchen Fan <wenc...@databricks.com>
Authored: Wed Sep 26 17:47:05 2018 -0700
Committer: gatorsmile <gatorsm...@gmail.com>
Committed: Wed Sep 26 17:47:47 2018 -0700

----------------------------------------------------------------------
 .../spark/sql/catalyst/analysis/DecimalPrecision.scala   | 10 ++++++----
 .../scala/org/apache/spark/sql/internal/SQLConf.scala    | 11 +++++++++++
 .../test/scala/org/apache/spark/sql/SQLQuerySuite.scala  |  7 +++++++
 3 files changed, 24 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/26d893a4/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala
index e6f4d97..a8130ab 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala
@@ -290,11 +290,13 @@ object DecimalPrecision extends TypeCoercionRule {
         // potentially loosing 11 digits of the fractional part. Using only 
the precision needed
         // by the Literal, instead, the result would be DECIMAL(38 + 1 + 1, 
18), which would
         // become DECIMAL(38, 16), safely having a much lower precision loss.
-        case (l: Literal, r) if r.dataType.isInstanceOf[DecimalType]
-          && l.dataType.isInstanceOf[IntegralType] =>
+        case (l: Literal, r) if r.dataType.isInstanceOf[DecimalType] &&
+            l.dataType.isInstanceOf[IntegralType] &&
+            SQLConf.get.literalPickMinimumPrecision =>
           b.makeCopy(Array(Cast(l, DecimalType.fromLiteral(l)), r))
-        case (l, r: Literal) if l.dataType.isInstanceOf[DecimalType]
-          && r.dataType.isInstanceOf[IntegralType] =>
+        case (l, r: Literal) if l.dataType.isInstanceOf[DecimalType] &&
+            r.dataType.isInstanceOf[IntegralType] &&
+            SQLConf.get.literalPickMinimumPrecision =>
           b.makeCopy(Array(l, Cast(r, DecimalType.fromLiteral(r))))
         // Promote integers inside a binary expression with fixed-precision 
decimals to decimals,
         // and fixed-precision decimals in an expression with floats / doubles 
to doubles

http://git-wip-us.apache.org/repos/asf/spark/blob/26d893a4/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 74e0f60..0bcc5d0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1103,6 +1103,15 @@ object SQLConf {
       .booleanConf
       .createWithDefault(true)
 
+  val LITERAL_PICK_MINIMUM_PRECISION =
+    buildConf("spark.sql.legacy.literal.pickMinimumPrecision")
+      .internal()
+      .doc("When integral literal is used in decimal operations, pick a 
minimum precision " +
+        "required by the literal if this config is true, to make the resulting 
precision and/or " +
+        "scale smaller. This can reduce the possibility of precision lose 
and/or overflow.")
+      .booleanConf
+      .createWithDefault(true)
+
   val SQL_OPTIONS_REDACTION_PATTERN =
     buildConf("spark.sql.redaction.options.regex")
       .doc("Regex to decide which keys in a Spark SQL command's options map 
contain sensitive " +
@@ -1519,6 +1528,8 @@ class SQLConf extends Serializable with Logging {
 
   def decimalOperationsAllowPrecisionLoss: Boolean = 
getConf(DECIMAL_OPERATIONS_ALLOW_PREC_LOSS)
 
+  def literalPickMinimumPrecision: Boolean = 
getConf(LITERAL_PICK_MINIMUM_PRECISION)
+
   def continuousStreamingExecutorQueueSize: Int = 
getConf(CONTINUOUS_STREAMING_EXECUTOR_QUEUE_SIZE)
 
   def continuousStreamingExecutorPollIntervalMs: Long =

http://git-wip-us.apache.org/repos/asf/spark/blob/26d893a4/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 2ff47f9..0af6d87 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -2824,6 +2824,13 @@ class SQLQuerySuite extends QueryTest with 
SharedSQLContext {
     val result = ds.flatMap(_.bar).distinct
     result.rdd.isEmpty
   }
+
+  test("SPARK-25454: decimal division with negative scale") {
+    // TODO: completely fix this issue even when LITERAL_PRECISE_PRECISION is 
true.
+    withSQLConf(SQLConf.LITERAL_PICK_MINIMUM_PRECISION.key -> "false") {
+      checkAnswer(sql("select 26393499451 / (1e6 * 1000)"), 
Row(BigDecimal("26.3934994510000")))
+    }
+  }
 }
 
 case class Foo(bar: Option[String])


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-25454][SQL] add a new config for picking minimum precision for integral literals

Reply via email to