Repository: spark
Updated Branches:
  refs/heads/master 28d944e86 -> 16b928c54


[SPARK-9529] [SQL] improve TungstenSort on DecimalType

Generate prefix for DecimalType, fix the random generator of decimal

cc JoshRosen

Author: Davies Liu <[email protected]>

Closes #7857 from davies/sort_decimal and squashes the following commits:

2433959 [Davies Liu] Merge branch 'master' of github.com:apache/spark into 
sort_decimal
de24253 [Davies Liu] fix style
0a54c1a [Davies Liu] sort decimal


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/16b928c5
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/16b928c5
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/16b928c5

Branch: refs/heads/master
Commit: 16b928c5436b9b500d25b49bf3670bc50ddafbf9
Parents: 28d944e
Author: Davies Liu <[email protected]>
Authored: Sat Aug 1 23:36:06 2015 -0700
Committer: Reynold Xin <[email protected]>
Committed: Sat Aug 1 23:36:06 2015 -0700

----------------------------------------------------------------------
 .../sql/catalyst/expressions/SortOrder.scala    | 13 +++++++++++++
 .../apache/spark/sql/RandomDataGenerator.scala  |  5 ++++-
 .../spark/sql/types/DataTypeTestUtils.scala     |  3 ++-
 .../spark/sql/execution/SortPrefixUtils.scala   | 20 ++++++++++----------
 .../spark/sql/execution/TungstenSortSuite.scala |  3 +--
 5 files changed, 30 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/16b928c5/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala
index afecf88..5eb5b0d 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala
@@ -67,6 +67,19 @@ case class SortPrefix(child: SortOrder) extends 
UnaryExpression {
         (DoublePrefixComparator.computePrefix(Double.NegativeInfinity),
           s"$DoublePrefixCmp.computePrefix((double)$input)")
       case StringType => (0L, s"$input.getPrefix()")
+      case dt: DecimalType if dt.precision - dt.scale <= 
Decimal.MAX_LONG_DIGITS =>
+        val prefix = if (dt.precision <= Decimal.MAX_LONG_DIGITS) {
+          s"$input.toUnscaledLong()"
+        } else {
+          // reduce the scale to fit in a long
+          val p = Decimal.MAX_LONG_DIGITS
+          val s = p - (dt.precision - dt.scale)
+          s"$input.changePrecision($p, $s) ? $input.toUnscaledLong() : 
${Long.MinValue}L"
+        }
+        (Long.MinValue, prefix)
+      case dt: DecimalType =>
+        (DoublePrefixComparator.computePrefix(Double.NegativeInfinity),
+          s"$DoublePrefixCmp.computePrefix($input.toDouble())")
       case _ => (0L, "0L")
     }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/16b928c5/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
index 81267dc..ea1fd23 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
@@ -107,7 +107,10 @@ object RandomDataGenerator {
       case DateType => Some(() => new java.sql.Date(rand.nextInt()))
       case TimestampType => Some(() => new java.sql.Timestamp(rand.nextLong()))
       case DecimalType.Fixed(precision, scale) => Some(
-        () => BigDecimal.apply(rand.nextLong(), rand.nextInt(), new 
MathContext(precision)))
+        () => BigDecimal.apply(
+          rand.nextLong() % math.pow(10, precision).toLong,
+          scale,
+          new MathContext(precision)))
       case DoubleType => randomNumeric[Double](
         rand, r => longBitsToDouble(r.nextLong()), Seq(Double.MinValue, 
Double.MinPositiveValue,
           Double.MaxValue, Double.PositiveInfinity, Double.NegativeInfinity, 
Double.NaN, 0.0))

http://git-wip-us.apache.org/repos/asf/spark/blob/16b928c5/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeTestUtils.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeTestUtils.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeTestUtils.scala
index 0ee9dda..417df00 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeTestUtils.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeTestUtils.scala
@@ -34,8 +34,9 @@ object DataTypeTestUtils {
    * decimal types.
    */
   val fractionalTypes: Set[FractionalType] = Set(
+    DecimalType.USER_DEFAULT,
+    DecimalType(20, 5),
     DecimalType.SYSTEM_DEFAULT,
-    DecimalType(2, 1),
     DoubleType,
     FloatType
   )

http://git-wip-us.apache.org/repos/asf/spark/blob/16b928c5/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala
index 6766565..2e870ec 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala
@@ -36,16 +36,16 @@ object SortPrefixUtils {
 
   def getPrefixComparator(sortOrder: SortOrder): PrefixComparator = {
     sortOrder.dataType match {
-      case StringType if sortOrder.isAscending => PrefixComparators.STRING
-      case StringType if !sortOrder.isAscending => 
PrefixComparators.STRING_DESC
-      case BooleanType | ByteType | ShortType | IntegerType | LongType | 
DateType | TimestampType
-          if sortOrder.isAscending =>
-        PrefixComparators.LONG
-      case BooleanType | ByteType | ShortType | IntegerType | LongType | 
DateType | TimestampType
-        if !sortOrder.isAscending =>
-        PrefixComparators.LONG_DESC
-      case FloatType | DoubleType if sortOrder.isAscending => 
PrefixComparators.DOUBLE
-      case FloatType | DoubleType if !sortOrder.isAscending => 
PrefixComparators.DOUBLE_DESC
+      case StringType =>
+        if (sortOrder.isAscending) PrefixComparators.STRING else 
PrefixComparators.STRING_DESC
+      case BooleanType | ByteType | ShortType | IntegerType | LongType | 
DateType | TimestampType =>
+        if (sortOrder.isAscending) PrefixComparators.LONG else 
PrefixComparators.LONG_DESC
+      case dt: DecimalType if dt.precision - dt.scale <= 
Decimal.MAX_LONG_DIGITS =>
+        if (sortOrder.isAscending) PrefixComparators.LONG else 
PrefixComparators.LONG_DESC
+      case FloatType | DoubleType =>
+        if (sortOrder.isAscending) PrefixComparators.DOUBLE else 
PrefixComparators.DOUBLE_DESC
+      case dt: DecimalType =>
+        if (sortOrder.isAscending) PrefixComparators.DOUBLE else 
PrefixComparators.DOUBLE_DESC
       case _ => NoOpPrefixComparator
     }
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/16b928c5/sql/core/src/test/scala/org/apache/spark/sql/execution/TungstenSortSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/TungstenSortSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/TungstenSortSuite.scala
index b3f821e..c794984 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/TungstenSortSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/TungstenSortSuite.scala
@@ -61,8 +61,7 @@ class TungstenSortSuite extends SparkPlanTest with 
BeforeAndAfterAll {
 
   // Test sorting on different data types
   for (
-    dataType <- DataTypeTestUtils.atomicTypes ++ Set(NullType)
-    if !dataType.isInstanceOf[DecimalType]; // We don't have an unsafe 
representation for decimals
+    dataType <- DataTypeTestUtils.atomicTypes ++ Set(NullType);
     nullable <- Seq(true, false);
     sortOrder <- Seq('a.asc :: Nil, 'a.desc :: Nil);
     randomDataGenerator <- RandomDataGenerator.forType(dataType, nullable)


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to