Repository: spark
Updated Branches:
refs/heads/master 23c58653f -> 2eaabfa41
[SPARK-16228][SQL] HiveSessionCatalog should return `double`-param functions
for decimal param lookups
## What changes were proposed in this pull request?
This PR supports a fallback lookup by casting `DecimalType` into `DoubleType`
for the external functions with `double`-type parameter.
**Reported Error Scenarios**
```scala
scala> sql("select percentile(value, 0.5) from values 1,2,3 T(value)")
org.apache.spark.sql.AnalysisException: ... No matching method for class
org.apache.hadoop.hive.ql.udf.UDAFPercentile with (int, decimal(38,18)).
Possible choices: _FUNC_(bigint, array<double>) _FUNC_(bigint, double) ; line
1 pos 7
scala> sql("select percentile_approx(value, 0.5) from values 1.0,2.0,3.0
T(value)")
org.apache.spark.sql.AnalysisException: ... Only a float/double or float/double
array argument is accepted as parameter 2, but decimal(38,18) was passed
instead.; line 1 pos 7
```
## How was this patch tested?
Pass the Jenkins tests (including a new testcase).
Author: Dongjoon Hyun <[email protected]>
Closes #13930 from dongjoon-hyun/SPARK-16228.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2eaabfa4
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2eaabfa4
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2eaabfa4
Branch: refs/heads/master
Commit: 2eaabfa4142d4050be2b45fd277ff5c7fa430581
Parents: 23c5865
Author: Dongjoon Hyun <[email protected]>
Authored: Wed Jun 29 16:08:10 2016 -0700
Committer: Reynold Xin <[email protected]>
Committed: Wed Jun 29 16:08:10 2016 -0700
----------------------------------------------------------------------
.../apache/spark/sql/hive/HiveSessionCatalog.scala | 16 +++++++++++++++-
.../spark/sql/hive/execution/HiveUDFSuite.scala | 7 +++++++
2 files changed, 22 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/2eaabfa4/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
----------------------------------------------------------------------
diff --git
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
index 8a47dcf..2589b9d 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
@@ -30,12 +30,13 @@ import org.apache.spark.sql.catalyst.{FunctionIdentifier,
TableIdentifier}
import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder
import org.apache.spark.sql.catalyst.catalog.{FunctionResourceLoader,
SessionCatalog}
-import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionInfo}
+import org.apache.spark.sql.catalyst.expressions.{Cast, Expression,
ExpressionInfo}
import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias}
import org.apache.spark.sql.catalyst.rules.Rule
import org.apache.spark.sql.hive.HiveShim.HiveFunctionWrapper
import org.apache.spark.sql.hive.client.HiveClient
import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.{DecimalType, DoubleType}
import org.apache.spark.util.Utils
@@ -163,6 +164,19 @@ private[sql] class HiveSessionCatalog(
}
override def lookupFunction(name: FunctionIdentifier, children:
Seq[Expression]): Expression = {
+ try {
+ lookupFunction0(name, children)
+ } catch {
+ case NonFatal(_) =>
+ // SPARK-16228 ExternalCatalog may recognize `double`-type only.
+ val newChildren = children.map { child =>
+ if (child.dataType.isInstanceOf[DecimalType]) Cast(child,
DoubleType) else child
+ }
+ lookupFunction0(name, newChildren)
+ }
+ }
+
+ private def lookupFunction0(name: FunctionIdentifier, children:
Seq[Expression]): Expression = {
// TODO: Once lookupFunction accepts a FunctionIdentifier, we should
refactor this method to
// if (super.functionExists(name)) {
// super.lookupFunction(name, children)
http://git-wip-us.apache.org/repos/asf/spark/blob/2eaabfa4/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
----------------------------------------------------------------------
diff --git
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
index 0f56b2c..def4601 100644
---
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
+++
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
@@ -142,6 +142,13 @@ class HiveUDFSuite extends QueryTest with
TestHiveSingleton with SQLTestUtils {
sql("SELECT array(max(key), max(key)) FROM src").collect().toSeq)
}
+ test("SPARK-16228 Percentile needs explicit cast to double") {
+ sql("select percentile(value, cast(0.5 as double)) from values 1,2,3
T(value)")
+ sql("select percentile_approx(value, cast(0.5 as double)) from values
1.0,2.0,3.0 T(value)")
+ sql("select percentile(value, 0.5) from values 1,2,3 T(value)")
+ sql("select percentile_approx(value, 0.5) from values 1.0,2.0,3.0
T(value)")
+ }
+
test("Generic UDAF aggregates") {
checkAnswer(sql("SELECT ceiling(percentile_approx(key, 0.99999D)) FROM src
LIMIT 1"),
sql("SELECT max(key) FROM src LIMIT 1").collect().toSeq)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]