This is an automated email from the ASF dual-hosted git repository.
tdsilva pushed a commit to branch 4.x-HBase-1.2
in repository https://gitbox.apache.org/repos/asf/phoenix.git
The following commit(s) were added to refs/heads/4.x-HBase-1.2 by this push:
new 9a1a494 PHOENIX-5102 Filtering on DATE types throws an exception
using the spark connector
9a1a494 is described below
commit 9a1a494fe012031c00ee3c9fba35fb74e988500f
Author: Thomas D'Silva <[email protected]>
AuthorDate: Tue Jan 15 16:56:15 2019 -0800
PHOENIX-5102 Filtering on DATE types throws an exception using the spark
connector
---
.../src/it/scala/org/apache/phoenix/spark/PhoenixSparkIT.scala | 6 +++++-
.../org/apache/phoenix/spark/FilterExpressionCompiler.scala | 9 +++++++++
2 files changed, 14 insertions(+), 1 deletion(-)
diff --git
a/phoenix-spark/src/it/scala/org/apache/phoenix/spark/PhoenixSparkIT.scala
b/phoenix-spark/src/it/scala/org/apache/phoenix/spark/PhoenixSparkIT.scala
index d6d0f92..b40b638 100644
--- a/phoenix-spark/src/it/scala/org/apache/phoenix/spark/PhoenixSparkIT.scala
+++ b/phoenix-spark/src/it/scala/org/apache/phoenix/spark/PhoenixSparkIT.scala
@@ -624,7 +624,7 @@ class PhoenixSparkIT extends AbstractPhoenixSparkIT {
varByteArray shouldEqual dataSet(0).get(3)
}
- test("Can load Phoenix DATE columns through DataFrame API") {
+ test("Can load and filter Phoenix DATE columns through DataFrame API") {
val df = spark.sqlContext.read
.format("phoenix")
.options(Map("table" -> "DATE_TEST", PhoenixDataSource.ZOOKEEPER_URL ->
quorumAddress))
@@ -638,6 +638,10 @@ class PhoenixSparkIT extends AbstractPhoenixSparkIT {
// Note that Spark also applies the timezone offset to the returned date
epoch. Rather than perform timezone
// gymnastics, just make sure we're within 24H of the epoch generated just
now
assert(Math.abs(epoch - dt) < 86400000)
+
+ df.createOrReplaceTempView("DATE_TEST")
+ val df2 = spark.sql("SELECT * FROM DATE_TEST WHERE COL1 >
TO_DATE('1990-01-01 00:00:01', 'yyyy-MM-dd HH:mm:ss')")
+ assert(df2.count() == 1L)
}
test("Filter operation doesn't work for column names containing a white
space (PHOENIX-2547)") {
diff --git
a/phoenix-spark/src/main/scala/org/apache/phoenix/spark/FilterExpressionCompiler.scala
b/phoenix-spark/src/main/scala/org/apache/phoenix/spark/FilterExpressionCompiler.scala
index 74ff67e..1d6973c 100644
---
a/phoenix-spark/src/main/scala/org/apache/phoenix/spark/FilterExpressionCompiler.scala
+++
b/phoenix-spark/src/main/scala/org/apache/phoenix/spark/FilterExpressionCompiler.scala
@@ -17,6 +17,7 @@
*/
package org.apache.phoenix.spark
+import java.sql.Date
import java.sql.Timestamp
import java.text.Format
@@ -26,6 +27,7 @@ import org.apache.spark.sql.sources._
class FilterExpressionCompiler() {
+ val dateformatter:Format =
DateUtil.getDateFormatter(DateUtil.DEFAULT_DATE_FORMAT,
DateUtil.DEFAULT_TIME_ZONE_ID)
val timeformatter:Format =
DateUtil.getTimestampFormatter(DateUtil.DEFAULT_TIME_FORMAT,
DateUtil.DEFAULT_TIME_ZONE_ID)
/**
@@ -102,6 +104,8 @@ class FilterExpressionCompiler() {
case timestampValue: Timestamp => getTimestampString(timestampValue)
+ case dateValue: Date => getDateString(dateValue)
+
// Borrowed from 'elasticsearch-hadoop', support these internal UTF types
across Spark versions
// Spark 1.4
case utf if (isClass(utf, "org.apache.spark.sql.types.UTF8String")) =>
s"'${escapeStringConstant(utf.toString)}'"
@@ -117,6 +121,11 @@ class FilterExpressionCompiler() {
DateUtil.DEFAULT_TIME_FORMAT, DateUtil.DEFAULT_TIME_ZONE_ID)
}
+ private def getDateString(dateValue: Date): String = {
+ "TO_DATE('%s', '%s', '%s')".format(dateformatter.format(dateValue),
+ DateUtil.DEFAULT_DATE_FORMAT, DateUtil.DEFAULT_TIME_ZONE_ID)
+ }
+
// Helper function to escape column key to work with SQL queries
private def escapeKey(key: String): String =
SchemaUtil.getEscapedFullColumnName(key)