spark git commit: [SPARK-17827][SQL] maxColLength type should be Int for String and Binary

hvanhovell Thu, 13 Oct 2016 11:27:01 -0700

Repository: spark
Updated Branches:
  refs/heads/master 04d417a7c -> 84f149e41



[SPARK-17827][SQL] maxColLength type should be Int for String and Binary

## What changes were proposed in this pull request?
correct the expected type from Length function to be Int

## How was this patch tested?
Test runs on little endian and big endian platforms

Author: Pete Robbins <robbin...@gmail.com>

Closes #15464 from robbinspg/SPARK-17827.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/84f149e4
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/84f149e4
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/84f149e4

Branch: refs/heads/master
Commit: 84f149e414475c2e60863898992001c21cfc13b2
Parents: 04d417a
Author: Pete Robbins <robbin...@gmail.com>
Authored: Thu Oct 13 11:26:30 2016 -0700
Committer: Herman van Hovell <hvanhov...@databricks.com>
Committed: Thu Oct 13 11:26:30 2016 -0700

----------------------------------------------------------------------
 .../apache/spark/sql/catalyst/plans/logical/Statistics.scala | 4 ++--
 .../scala/org/apache/spark/sql/StatisticsColumnSuite.scala   | 8 ++++----
 .../scala/org/apache/spark/sql/hive/StatisticsSuite.scala    | 2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/84f149e4/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Statistics.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Statistics.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Statistics.scala
index 43455c9..f3e2147 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Statistics.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Statistics.scala
@@ -98,7 +98,7 @@ case class StringColumnStat(statRow: InternalRow) {
   // The indices here must be consistent with 
`ColumnStatStruct.stringColumnStat`.
   val numNulls: Long = statRow.getLong(0)
   val avgColLen: Double = statRow.getDouble(1)
-  val maxColLen: Long = statRow.getLong(2)
+  val maxColLen: Long = statRow.getInt(2)
   val ndv: Long = statRow.getLong(3)
 }
 
@@ -106,7 +106,7 @@ case class BinaryColumnStat(statRow: InternalRow) {
   // The indices here must be consistent with 
`ColumnStatStruct.binaryColumnStat`.
   val numNulls: Long = statRow.getLong(0)
   val avgColLen: Double = statRow.getDouble(1)
-  val maxColLen: Long = statRow.getLong(2)
+  val maxColLen: Long = statRow.getInt(2)
 }
 
 case class BooleanColumnStat(statRow: InternalRow) {

http://git-wip-us.apache.org/repos/asf/spark/blob/84f149e4/sql/core/src/test/scala/org/apache/spark/sql/StatisticsColumnSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsColumnSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsColumnSuite.scala
index 0ee0547..f1a201a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsColumnSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsColumnSuite.scala
@@ -150,7 +150,7 @@ class StatisticsColumnSuite extends StatisticsTest {
       val colStat = ColumnStat(InternalRow(
         values.count(_.isEmpty).toLong,
         nonNullValues.map(_.length).sum / nonNullValues.length.toDouble,
-        nonNullValues.map(_.length).max.toLong,
+        nonNullValues.map(_.length).max.toInt,
         nonNullValues.distinct.length.toLong))
       (f, colStat)
     }
@@ -165,7 +165,7 @@ class StatisticsColumnSuite extends StatisticsTest {
       val colStat = ColumnStat(InternalRow(
         values.count(_.isEmpty).toLong,
         nonNullValues.map(_.length).sum / nonNullValues.length.toDouble,
-        nonNullValues.map(_.length).max.toLong))
+        nonNullValues.map(_.length).max.toInt))
       (f, colStat)
     }
     checkColStats(df, expectedColStatsSeq)
@@ -255,10 +255,10 @@ class StatisticsColumnSuite extends StatisticsTest {
               doubleSeq.distinct.length.toLong))
         case StringType =>
           ColumnStat(InternalRow(0L, stringSeq.map(_.length).sum / 
stringSeq.length.toDouble,
-                stringSeq.map(_.length).max.toLong, 
stringSeq.distinct.length.toLong))
+                stringSeq.map(_.length).max.toInt, 
stringSeq.distinct.length.toLong))
         case BinaryType =>
           ColumnStat(InternalRow(0L, binarySeq.map(_.length).sum / 
binarySeq.length.toDouble,
-                binarySeq.map(_.length).max.toLong))
+                binarySeq.map(_.length).max.toInt))
         case BooleanType =>
           ColumnStat(InternalRow(0L, booleanSeq.count(_.equals(true)).toLong,
               booleanSeq.count(_.equals(false)).toLong))

http://git-wip-us.apache.org/repos/asf/spark/blob/84f149e4/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
index 99dd080..85228bb 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
@@ -378,7 +378,7 @@ class StatisticsSuite extends QueryTest with 
TestHiveSingleton with SQLTestUtils
             ColumnStat(InternalRow(0L, intSeq.max, intSeq.min, 
intSeq.distinct.length.toLong))
           case StringType =>
             ColumnStat(InternalRow(0L, stringSeq.map(_.length).sum / 
stringSeq.length.toDouble,
-              stringSeq.map(_.length).max.toLong, 
stringSeq.distinct.length.toLong))
+              stringSeq.map(_.length).max.toInt, 
stringSeq.distinct.length.toLong))
           case BooleanType =>
             ColumnStat(InternalRow(0L, booleanSeq.count(_.equals(true)).toLong,
               booleanSeq.count(_.equals(false)).toLong))


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-17827][SQL] maxColLength type should be Int for String and Binary

Reply via email to