spark git commit: [SPARK-16228][SQL] HiveSessionCatalog should return `double`-param functions for decimal param lookups

2016-06-29 Thread rxin
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 c4cebd572 -> 011befd20


[SPARK-16228][SQL] HiveSessionCatalog should return `double`-param functions 
for decimal param lookups

## What changes were proposed in this pull request?

This PR supports a fallback lookup by casting `DecimalType` into `DoubleType` 
for the external functions with `double`-type parameter.

**Reported Error Scenarios**
```scala
scala> sql("select percentile(value, 0.5) from values 1,2,3 T(value)")
org.apache.spark.sql.AnalysisException: ... No matching method for class 
org.apache.hadoop.hive.ql.udf.UDAFPercentile with (int, decimal(38,18)). 
Possible choices: _FUNC_(bigint, array)  _FUNC_(bigint, double)  ; line 
1 pos 7

scala> sql("select percentile_approx(value, 0.5) from values 1.0,2.0,3.0 
T(value)")
org.apache.spark.sql.AnalysisException: ... Only a float/double or float/double 
array argument is accepted as parameter 2, but decimal(38,18) was passed 
instead.; line 1 pos 7
```

## How was this patch tested?

Pass the Jenkins tests (including a new testcase).

Author: Dongjoon Hyun 

Closes #13930 from dongjoon-hyun/SPARK-16228.

(cherry picked from commit 2eaabfa4142d4050be2b45fd277ff5c7fa430581)
Signed-off-by: Reynold Xin 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/011befd2
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/011befd2
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/011befd2

Branch: refs/heads/branch-2.0
Commit: 011befd2098bf78979cc8e00de1576bf339583b2
Parents: c4cebd5
Author: Dongjoon Hyun 
Authored: Wed Jun 29 16:08:10 2016 -0700
Committer: Reynold Xin 
Committed: Wed Jun 29 16:08:19 2016 -0700

--
 .../apache/spark/sql/hive/HiveSessionCatalog.scala  | 16 +++-
 .../spark/sql/hive/execution/HiveUDFSuite.scala |  7 +++
 2 files changed, 22 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/011befd2/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
--
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
index 8a47dcf..2589b9d 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
@@ -30,12 +30,13 @@ import org.apache.spark.sql.catalyst.{FunctionIdentifier, 
TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
 import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder
 import org.apache.spark.sql.catalyst.catalog.{FunctionResourceLoader, 
SessionCatalog}
-import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionInfo}
+import org.apache.spark.sql.catalyst.expressions.{Cast, Expression, 
ExpressionInfo}
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.hive.HiveShim.HiveFunctionWrapper
 import org.apache.spark.sql.hive.client.HiveClient
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.{DecimalType, DoubleType}
 import org.apache.spark.util.Utils
 
 
@@ -163,6 +164,19 @@ private[sql] class HiveSessionCatalog(
   }
 
   override def lookupFunction(name: FunctionIdentifier, children: 
Seq[Expression]): Expression = {
+try {
+  lookupFunction0(name, children)
+} catch {
+  case NonFatal(_) =>
+// SPARK-16228 ExternalCatalog may recognize `double`-type only.
+val newChildren = children.map { child =>
+  if (child.dataType.isInstanceOf[DecimalType]) Cast(child, 
DoubleType) else child
+}
+lookupFunction0(name, newChildren)
+}
+  }
+
+  private def lookupFunction0(name: FunctionIdentifier, children: 
Seq[Expression]): Expression = {
 // TODO: Once lookupFunction accepts a FunctionIdentifier, we should 
refactor this method to
 // if (super.functionExists(name)) {
 //   super.lookupFunction(name, children)

http://git-wip-us.apache.org/repos/asf/spark/blob/011befd2/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
--
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
index 0f56b2c..def4601 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
@@ -142,6 +142,13 @@ class HiveUDFSuite extends QueryTest with 
T

spark git commit: [SPARK-16228][SQL] HiveSessionCatalog should return `double`-param functions for decimal param lookups

2016-06-29 Thread rxin
Repository: spark
Updated Branches:
  refs/heads/master 23c58653f -> 2eaabfa41


[SPARK-16228][SQL] HiveSessionCatalog should return `double`-param functions 
for decimal param lookups

## What changes were proposed in this pull request?

This PR supports a fallback lookup by casting `DecimalType` into `DoubleType` 
for the external functions with `double`-type parameter.

**Reported Error Scenarios**
```scala
scala> sql("select percentile(value, 0.5) from values 1,2,3 T(value)")
org.apache.spark.sql.AnalysisException: ... No matching method for class 
org.apache.hadoop.hive.ql.udf.UDAFPercentile with (int, decimal(38,18)). 
Possible choices: _FUNC_(bigint, array)  _FUNC_(bigint, double)  ; line 
1 pos 7

scala> sql("select percentile_approx(value, 0.5) from values 1.0,2.0,3.0 
T(value)")
org.apache.spark.sql.AnalysisException: ... Only a float/double or float/double 
array argument is accepted as parameter 2, but decimal(38,18) was passed 
instead.; line 1 pos 7
```

## How was this patch tested?

Pass the Jenkins tests (including a new testcase).

Author: Dongjoon Hyun 

Closes #13930 from dongjoon-hyun/SPARK-16228.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2eaabfa4
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2eaabfa4
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2eaabfa4

Branch: refs/heads/master
Commit: 2eaabfa4142d4050be2b45fd277ff5c7fa430581
Parents: 23c5865
Author: Dongjoon Hyun 
Authored: Wed Jun 29 16:08:10 2016 -0700
Committer: Reynold Xin 
Committed: Wed Jun 29 16:08:10 2016 -0700

--
 .../apache/spark/sql/hive/HiveSessionCatalog.scala  | 16 +++-
 .../spark/sql/hive/execution/HiveUDFSuite.scala |  7 +++
 2 files changed, 22 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/2eaabfa4/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
--
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
index 8a47dcf..2589b9d 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
@@ -30,12 +30,13 @@ import org.apache.spark.sql.catalyst.{FunctionIdentifier, 
TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
 import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder
 import org.apache.spark.sql.catalyst.catalog.{FunctionResourceLoader, 
SessionCatalog}
-import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionInfo}
+import org.apache.spark.sql.catalyst.expressions.{Cast, Expression, 
ExpressionInfo}
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.hive.HiveShim.HiveFunctionWrapper
 import org.apache.spark.sql.hive.client.HiveClient
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.{DecimalType, DoubleType}
 import org.apache.spark.util.Utils
 
 
@@ -163,6 +164,19 @@ private[sql] class HiveSessionCatalog(
   }
 
   override def lookupFunction(name: FunctionIdentifier, children: 
Seq[Expression]): Expression = {
+try {
+  lookupFunction0(name, children)
+} catch {
+  case NonFatal(_) =>
+// SPARK-16228 ExternalCatalog may recognize `double`-type only.
+val newChildren = children.map { child =>
+  if (child.dataType.isInstanceOf[DecimalType]) Cast(child, 
DoubleType) else child
+}
+lookupFunction0(name, newChildren)
+}
+  }
+
+  private def lookupFunction0(name: FunctionIdentifier, children: 
Seq[Expression]): Expression = {
 // TODO: Once lookupFunction accepts a FunctionIdentifier, we should 
refactor this method to
 // if (super.functionExists(name)) {
 //   super.lookupFunction(name, children)

http://git-wip-us.apache.org/repos/asf/spark/blob/2eaabfa4/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
--
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
index 0f56b2c..def4601 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
@@ -142,6 +142,13 @@ class HiveUDFSuite extends QueryTest with 
TestHiveSingleton with SQLTestUtils {
   sql("SELECT array(max(key), max(key)) FROM src").collect().toS