Repository: spark
Updated Branches:
  refs/heads/master 14978b785 -> ea02e5513


[SPARK-10859] [SQL] fix stats of StringType in columnar cache

The UTF8String may come from UnsafeRow, then underline buffer of it is not 
copied, so we should clone it in order to hold it in Stats.

cc yhuai

Author: Davies Liu <[email protected]>

Closes #8929 from davies/pushdown_string.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ea02e551
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ea02e551
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ea02e551

Branch: refs/heads/master
Commit: ea02e5513a8f9853094d5612c962fc8c1a340f50
Parents: 14978b7
Author: Davies Liu <[email protected]>
Authored: Mon Sep 28 14:40:40 2015 -0700
Committer: Yin Huai <[email protected]>
Committed: Mon Sep 28 14:40:40 2015 -0700

----------------------------------------------------------------------
 .../scala/org/apache/spark/sql/columnar/ColumnStats.scala     | 4 ++--
 .../spark/sql/columnar/InMemoryColumnarQuerySuite.scala       | 7 +++++++
 2 files changed, 9 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/ea02e551/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnStats.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnStats.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnStats.scala
index 5cbd52b..fbd51b7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnStats.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnStats.scala
@@ -213,8 +213,8 @@ private[sql] class StringColumnStats extends ColumnStats {
     super.gatherStats(row, ordinal)
     if (!row.isNullAt(ordinal)) {
       val value = row.getUTF8String(ordinal)
-      if (upper == null || value.compareTo(upper) > 0) upper = value
-      if (lower == null || value.compareTo(lower) < 0) lower = value
+      if (upper == null || value.compareTo(upper) > 0) upper = value.clone()
+      if (lower == null || value.compareTo(lower) < 0) lower = value.clone()
       sizeInBytes += STRING.actualSize(row, ordinal)
     }
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/ea02e551/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala
index cd3644e..ea5dd2b 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala
@@ -212,4 +212,11 @@ class InMemoryColumnarQuerySuite extends QueryTest with 
SharedSQLContext {
     // Drop the cache.
     cached.unpersist()
   }
+
+  test("SPARK-10859: Predicates pushed to InMemoryColumnarTableScan are not 
evaluated correctly") {
+    val data = sqlContext.range(10).selectExpr("id", "cast(id as string) as s")
+    data.cache()
+    assert(data.count() === 10)
+    assert(data.filter($"s" === "3").count() === 1)
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to