Repository: spark Updated Branches: refs/heads/master 14978b785 -> ea02e5513
[SPARK-10859] [SQL] fix stats of StringType in columnar cache The UTF8String may come from UnsafeRow, then underline buffer of it is not copied, so we should clone it in order to hold it in Stats. cc yhuai Author: Davies Liu <[email protected]> Closes #8929 from davies/pushdown_string. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ea02e551 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ea02e551 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ea02e551 Branch: refs/heads/master Commit: ea02e5513a8f9853094d5612c962fc8c1a340f50 Parents: 14978b7 Author: Davies Liu <[email protected]> Authored: Mon Sep 28 14:40:40 2015 -0700 Committer: Yin Huai <[email protected]> Committed: Mon Sep 28 14:40:40 2015 -0700 ---------------------------------------------------------------------- .../scala/org/apache/spark/sql/columnar/ColumnStats.scala | 4 ++-- .../spark/sql/columnar/InMemoryColumnarQuerySuite.scala | 7 +++++++ 2 files changed, 9 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/ea02e551/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnStats.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnStats.scala b/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnStats.scala index 5cbd52b..fbd51b7 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnStats.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnStats.scala @@ -213,8 +213,8 @@ private[sql] class StringColumnStats extends ColumnStats { super.gatherStats(row, ordinal) if (!row.isNullAt(ordinal)) { val value = row.getUTF8String(ordinal) - if (upper == null || value.compareTo(upper) > 0) upper = value - if (lower == null || value.compareTo(lower) < 0) lower = value + if (upper == null || value.compareTo(upper) > 0) upper = value.clone() + if (lower == null || value.compareTo(lower) < 0) lower = value.clone() sizeInBytes += STRING.actualSize(row, ordinal) } } http://git-wip-us.apache.org/repos/asf/spark/blob/ea02e551/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala index cd3644e..ea5dd2b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala @@ -212,4 +212,11 @@ class InMemoryColumnarQuerySuite extends QueryTest with SharedSQLContext { // Drop the cache. cached.unpersist() } + + test("SPARK-10859: Predicates pushed to InMemoryColumnarTableScan are not evaluated correctly") { + val data = sqlContext.range(10).selectExpr("id", "cast(id as string) as s") + data.cache() + assert(data.count() === 10) + assert(data.filter($"s" === "3").count() === 1) + } } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
