[
https://issues.apache.org/jira/browse/SPARK-30181?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Yu-Jhe Li updated SPARK-30181:
------------------------------
Description:
SQL below will throw a runtime exception since spark-2.4.0. I think it's a bug
brought from SPARK-22384
{code:scala}
val df = Seq(
(1, java.sql.Timestamp.valueOf("2019-12-01 00:00:00"), 1),
(2, java.sql.Timestamp.valueOf("2019-12-01 01:00:00"), 1)
).toDF("id", "dt", "value")
df.write.partitionBy("dt").mode("overwrite").saveAsTable("timestamp_part")
spark.sql("select * from timestamp_part where dt >= '2019-12-01
00:00:00'").explain(true)
{code}
{noformat}
Caught Hive MetaException attempting to get partition metadata by filter from
Hive. You can set the Spark configuration setting
spark.sql.hive.manageFilesourcePartitions to false to work around this problem,
however this will result in degraded performance. Please report a bug:
https://issues.apache.org/jira/browse/SPARK
java.lang.RuntimeException: Caught Hive MetaException attempting to get
partition metadata by filter from Hive. You can set the Spark configuration
setting spark.sql.hive.manageFilesourcePartitions to false to work around this
problem, however this will result in degraded performance. Please report a bug:
https://issues.apache.org/jira/browse/SPARK
at
org.apache.spark.sql.hive.client.Shim_v0_13.getPartitionsByFilter(HiveShim.scala:774)
at
org.apache.spark.sql.hive.client.HiveClientImpl$$anonfun$getPartitionsByFilter$1.apply(HiveClientImpl.scala:679)
at
org.apache.spark.sql.hive.client.HiveClientImpl$$anonfun$getPartitionsByFilter$1.apply(HiveClientImpl.scala:677)
at
org.apache.spark.sql.hive.client.HiveClientImpl$$anonfun$withHiveState$1.apply(HiveClientImpl.scala:275)
at
org.apache.spark.sql.hive.client.HiveClientImpl.liftedTree1$1(HiveClientImpl.scala:213)
at
org.apache.spark.sql.hive.client.HiveClientImpl.retryLocked(HiveClientImpl.scala:212)
at
org.apache.spark.sql.hive.client.HiveClientImpl.withHiveState(HiveClientImpl.scala:258)
at
org.apache.spark.sql.hive.client.HiveClientImpl.getPartitionsByFilter(HiveClientImpl.scala:677)
at
org.apache.spark.sql.hive.client.HiveClientSuite.testMetastorePartitionFiltering(HiveClientSuite.scala:310)
at
org.apache.spark.sql.hive.client.HiveClientSuite.org$apache$spark$sql$hive$client$HiveClientSuite$$testMetastorePartitionFiltering(HiveClientSuite.scala:282)
at
org.apache.spark.sql.hive.client.HiveClientSuite$$anonfun$1.apply$mcV$sp(HiveClientSuite.scala:105)
at
org.apache.spark.sql.hive.client.HiveClientSuite$$anonfun$1.apply(HiveClientSuite.scala:105)
at
org.apache.spark.sql.hive.client.HiveClientSuite$$anonfun$1.apply(HiveClientSuite.scala:105)
at org.scalatest.OutcomeOf$class.outcomeOf(OutcomeOf.scala:85)
at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104)
at org.scalatest.Transformer.apply(Transformer.scala:22)
at org.scalatest.Transformer.apply(Transformer.scala:20)
at org.scalatest.FunSuiteLike$$anon$1.apply(FunSuiteLike.scala:186)
at org.apache.spark.SparkFunSuite.withFixture(SparkFunSuite.scala:103)
at
org.scalatest.FunSuiteLike$class.invokeWithFixture$1(FunSuiteLike.scala:183)
at org.scalatest.FunSuiteLike$$anonfun$runTest$1.apply(FunSuiteLike.scala:196)
at org.scalatest.FunSuiteLike$$anonfun$runTest$1.apply(FunSuiteLike.scala:196)
at org.scalatest.SuperEngine.runTestImpl(Engine.scala:289)
at org.scalatest.FunSuiteLike$class.runTest(FunSuiteLike.scala:196)
at org.scalatest.FunSuite.runTest(FunSuite.scala:1560)
at
org.scalatest.FunSuiteLike$$anonfun$runTests$1.apply(FunSuiteLike.scala:229)
at
org.scalatest.FunSuiteLike$$anonfun$runTests$1.apply(FunSuiteLike.scala:229)
at
org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:396)
at
org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:384)
at scala.collection.immutable.List.foreach(List.scala:392)
at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:384)
at
org.scalatest.SuperEngine.org$scalatest$SuperEngine$$runTestsInBranch(Engine.scala:379)
at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:461)
at org.scalatest.FunSuiteLike$class.runTests(FunSuiteLike.scala:229)
at org.scalatest.FunSuite.runTests(FunSuite.scala:1560)
at org.scalatest.Suite$class.run(Suite.scala:1147)
at
org.scalatest.FunSuite.org$scalatest$FunSuiteLike$$super$run(FunSuite.scala:1560)
at org.scalatest.FunSuiteLike$$anonfun$run$1.apply(FunSuiteLike.scala:233)
at org.scalatest.FunSuiteLike$$anonfun$run$1.apply(FunSuiteLike.scala:233)
at org.scalatest.SuperEngine.runImpl(Engine.scala:521)
at org.scalatest.FunSuiteLike$class.run(FunSuiteLike.scala:233)
at
org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterAll$$super$run(SparkFunSuite.scala:52)
at
org.scalatest.BeforeAndAfterAll$class.liftedTree1$1(BeforeAndAfterAll.scala:213)
at org.scalatest.BeforeAndAfterAll$class.run(BeforeAndAfterAll.scala:210)
at org.apache.spark.SparkFunSuite.run(SparkFunSuite.scala:52)
at org.scalatest.Suite$class.callExecuteOnSuite$1(Suite.scala:1210)
at org.scalatest.Suite$$anonfun$runNestedSuites$1.apply(Suite.scala:1257)
at org.scalatest.Suite$$anonfun$runNestedSuites$1.apply(Suite.scala:1255)
at
scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33)
at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:186)
at org.scalatest.Suite$class.runNestedSuites(Suite.scala:1255)
at
org.apache.spark.sql.hive.client.HiveClientSuites.runNestedSuites(HiveClientSuites.scala:24)
at org.scalatest.Suite$class.run(Suite.scala:1144)
at
org.apache.spark.sql.hive.client.HiveClientSuites.run(HiveClientSuites.scala:24)
at org.scalatest.tools.SuiteRunner.run(SuiteRunner.scala:45)
at
org.scalatest.tools.Runner$$anonfun$doRunRunRunDaDoRunRun$1.apply(Runner.scala:1340)
at
org.scalatest.tools.Runner$$anonfun$doRunRunRunDaDoRunRun$1.apply(Runner.scala:1334)
at scala.collection.immutable.List.foreach(List.scala:392)
at org.scalatest.tools.Runner$.doRunRunRunDaDoRunRun(Runner.scala:1334)
at
org.scalatest.tools.Runner$$anonfun$runOptionallyWithPassFailReporter$2.apply(Runner.scala:1011)
at
org.scalatest.tools.Runner$$anonfun$runOptionallyWithPassFailReporter$2.apply(Runner.scala:1010)
at
org.scalatest.tools.Runner$.withClassLoaderAndDispatchReporter(Runner.scala:1500)
at
org.scalatest.tools.Runner$.runOptionallyWithPassFailReporter(Runner.scala:1010)
at org.scalatest.tools.Runner$.run(Runner.scala:850)
at org.scalatest.tools.Runner.run(Runner.scala)
at
org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.runScalaTest2(ScalaTestRunner.java:133)
at
org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.main(ScalaTestRunner.java:27)
Caused by: java.lang.reflect.InvocationTargetException
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at
org.apache.spark.sql.hive.client.Shim_v0_13.getPartitionsByFilter(HiveShim.scala:761)
... 66 more
Caused by: MetaException(message:Filtering is supported only on partition keys
of type string)
at
org.apache.hadoop.hive.metastore.parser.ExpressionTree$FilterBuilder.setError(ExpressionTree.java:185)
at
org.apache.hadoop.hive.metastore.parser.ExpressionTree$LeafNode.getJdoFilterPushdownParam(ExpressionTree.java:440)
at
org.apache.hadoop.hive.metastore.parser.ExpressionTree$LeafNode.generateJDOFilterOverPartitions(ExpressionTree.java:357)
at
org.apache.hadoop.hive.metastore.parser.ExpressionTree$LeafNode.generateJDOFilter(ExpressionTree.java:279)
at
org.apache.hadoop.hive.metastore.parser.ExpressionTree.generateJDOFilterFragment(ExpressionTree.java:578)
at
org.apache.hadoop.hive.metastore.ObjectStore.makeQueryFilterString(ObjectStore.java:2615)
at
org.apache.hadoop.hive.metastore.ObjectStore.getPartitionsViaOrmFilter(ObjectStore.java:2199)
at
org.apache.hadoop.hive.metastore.ObjectStore.access$500(ObjectStore.java:160)
at
org.apache.hadoop.hive.metastore.ObjectStore$5.getJdoResult(ObjectStore.java:2530)
at
org.apache.hadoop.hive.metastore.ObjectStore$5.getJdoResult(ObjectStore.java:2515)
at
org.apache.hadoop.hive.metastore.ObjectStore$GetHelper.run(ObjectStore.java:2391)
at
org.apache.hadoop.hive.metastore.ObjectStore.getPartitionsByFilterInternal(ObjectStore.java:2532)
at
org.apache.hadoop.hive.metastore.ObjectStore.getPartitionsByFilter(ObjectStore.java:2335)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at
org.apache.hadoop.hive.metastore.RawStoreProxy.invoke(RawStoreProxy.java:114)
at com.sun.proxy.$Proxy9.getPartitionsByFilter(Unknown Source)
at
org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.get_partitions_by_filter(HiveMetaStore.java:4448)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at
org.apache.hadoop.hive.metastore.RetryingHMSHandler.invoke(RetryingHMSHandler.java:107)
at com.sun.proxy.$Proxy11.get_partitions_by_filter(Unknown Source)
at
org.apache.hadoop.hive.metastore.HiveMetaStoreClient.listPartitionsByFilter(HiveMetaStoreClient.java:1105)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at
org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.invoke(RetryingMetaStoreClient.java:156)
at com.sun.proxy.$Proxy12.listPartitionsByFilter(Unknown Source)
at
org.apache.hadoop.hive.ql.metadata.Hive.getPartitionsByFilter(Hive.java:2254)
... 71 more
{noformat}
The reason is that Hive partition pruning is supported only on partition keys
of type string or integral types.
was:
SQL below will throw a runtime exception since spark-2.4.0. I think it's a bug
brought from SPARK-22384
{code:scala}
spark.sql("CREATE TABLE timestamp_part (value INT) PARTITIONED BY (dt
TIMESTAMP)")
val df = Seq(
(1, java.sql.Timestamp.valueOf("2019-12-01 00:00:00"), 1),
(2, java.sql.Timestamp.valueOf("2019-12-01 01:00:00"), 1)
).toDF("id", "dt", "value")
df.write.partitionBy("dt").mode("overwrite").saveAsTable("timestamp_part")
spark.sql("select * from timestamp_part where dt >= '2019-12-01
00:00:00'").explain(true)
{code}
{noformat}
Caught Hive MetaException attempting to get partition metadata by filter from
Hive. You can set the Spark configuration setting
spark.sql.hive.manageFilesourcePartitions to false to work around this problem,
however this will result in degraded performance. Please report a bug:
https://issues.apache.org/jira/browse/SPARK
java.lang.RuntimeException: Caught Hive MetaException attempting to get
partition metadata by filter from Hive. You can set the Spark configuration
setting spark.sql.hive.manageFilesourcePartitions to false to work around this
problem, however this will result in degraded performance. Please report a bug:
https://issues.apache.org/jira/browse/SPARK
at
org.apache.spark.sql.hive.client.Shim_v0_13.getPartitionsByFilter(HiveShim.scala:774)
at
org.apache.spark.sql.hive.client.HiveClientImpl$$anonfun$getPartitionsByFilter$1.apply(HiveClientImpl.scala:679)
at
org.apache.spark.sql.hive.client.HiveClientImpl$$anonfun$getPartitionsByFilter$1.apply(HiveClientImpl.scala:677)
at
org.apache.spark.sql.hive.client.HiveClientImpl$$anonfun$withHiveState$1.apply(HiveClientImpl.scala:275)
at
org.apache.spark.sql.hive.client.HiveClientImpl.liftedTree1$1(HiveClientImpl.scala:213)
at
org.apache.spark.sql.hive.client.HiveClientImpl.retryLocked(HiveClientImpl.scala:212)
at
org.apache.spark.sql.hive.client.HiveClientImpl.withHiveState(HiveClientImpl.scala:258)
at
org.apache.spark.sql.hive.client.HiveClientImpl.getPartitionsByFilter(HiveClientImpl.scala:677)
at
org.apache.spark.sql.hive.client.HiveClientSuite.testMetastorePartitionFiltering(HiveClientSuite.scala:310)
at
org.apache.spark.sql.hive.client.HiveClientSuite.org$apache$spark$sql$hive$client$HiveClientSuite$$testMetastorePartitionFiltering(HiveClientSuite.scala:282)
at
org.apache.spark.sql.hive.client.HiveClientSuite$$anonfun$1.apply$mcV$sp(HiveClientSuite.scala:105)
at
org.apache.spark.sql.hive.client.HiveClientSuite$$anonfun$1.apply(HiveClientSuite.scala:105)
at
org.apache.spark.sql.hive.client.HiveClientSuite$$anonfun$1.apply(HiveClientSuite.scala:105)
at org.scalatest.OutcomeOf$class.outcomeOf(OutcomeOf.scala:85)
at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104)
at org.scalatest.Transformer.apply(Transformer.scala:22)
at org.scalatest.Transformer.apply(Transformer.scala:20)
at org.scalatest.FunSuiteLike$$anon$1.apply(FunSuiteLike.scala:186)
at org.apache.spark.SparkFunSuite.withFixture(SparkFunSuite.scala:103)
at
org.scalatest.FunSuiteLike$class.invokeWithFixture$1(FunSuiteLike.scala:183)
at org.scalatest.FunSuiteLike$$anonfun$runTest$1.apply(FunSuiteLike.scala:196)
at org.scalatest.FunSuiteLike$$anonfun$runTest$1.apply(FunSuiteLike.scala:196)
at org.scalatest.SuperEngine.runTestImpl(Engine.scala:289)
at org.scalatest.FunSuiteLike$class.runTest(FunSuiteLike.scala:196)
at org.scalatest.FunSuite.runTest(FunSuite.scala:1560)
at
org.scalatest.FunSuiteLike$$anonfun$runTests$1.apply(FunSuiteLike.scala:229)
at
org.scalatest.FunSuiteLike$$anonfun$runTests$1.apply(FunSuiteLike.scala:229)
at
org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:396)
at
org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:384)
at scala.collection.immutable.List.foreach(List.scala:392)
at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:384)
at
org.scalatest.SuperEngine.org$scalatest$SuperEngine$$runTestsInBranch(Engine.scala:379)
at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:461)
at org.scalatest.FunSuiteLike$class.runTests(FunSuiteLike.scala:229)
at org.scalatest.FunSuite.runTests(FunSuite.scala:1560)
at org.scalatest.Suite$class.run(Suite.scala:1147)
at
org.scalatest.FunSuite.org$scalatest$FunSuiteLike$$super$run(FunSuite.scala:1560)
at org.scalatest.FunSuiteLike$$anonfun$run$1.apply(FunSuiteLike.scala:233)
at org.scalatest.FunSuiteLike$$anonfun$run$1.apply(FunSuiteLike.scala:233)
at org.scalatest.SuperEngine.runImpl(Engine.scala:521)
at org.scalatest.FunSuiteLike$class.run(FunSuiteLike.scala:233)
at
org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterAll$$super$run(SparkFunSuite.scala:52)
at
org.scalatest.BeforeAndAfterAll$class.liftedTree1$1(BeforeAndAfterAll.scala:213)
at org.scalatest.BeforeAndAfterAll$class.run(BeforeAndAfterAll.scala:210)
at org.apache.spark.SparkFunSuite.run(SparkFunSuite.scala:52)
at org.scalatest.Suite$class.callExecuteOnSuite$1(Suite.scala:1210)
at org.scalatest.Suite$$anonfun$runNestedSuites$1.apply(Suite.scala:1257)
at org.scalatest.Suite$$anonfun$runNestedSuites$1.apply(Suite.scala:1255)
at
scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33)
at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:186)
at org.scalatest.Suite$class.runNestedSuites(Suite.scala:1255)
at
org.apache.spark.sql.hive.client.HiveClientSuites.runNestedSuites(HiveClientSuites.scala:24)
at org.scalatest.Suite$class.run(Suite.scala:1144)
at
org.apache.spark.sql.hive.client.HiveClientSuites.run(HiveClientSuites.scala:24)
at org.scalatest.tools.SuiteRunner.run(SuiteRunner.scala:45)
at
org.scalatest.tools.Runner$$anonfun$doRunRunRunDaDoRunRun$1.apply(Runner.scala:1340)
at
org.scalatest.tools.Runner$$anonfun$doRunRunRunDaDoRunRun$1.apply(Runner.scala:1334)
at scala.collection.immutable.List.foreach(List.scala:392)
at org.scalatest.tools.Runner$.doRunRunRunDaDoRunRun(Runner.scala:1334)
at
org.scalatest.tools.Runner$$anonfun$runOptionallyWithPassFailReporter$2.apply(Runner.scala:1011)
at
org.scalatest.tools.Runner$$anonfun$runOptionallyWithPassFailReporter$2.apply(Runner.scala:1010)
at
org.scalatest.tools.Runner$.withClassLoaderAndDispatchReporter(Runner.scala:1500)
at
org.scalatest.tools.Runner$.runOptionallyWithPassFailReporter(Runner.scala:1010)
at org.scalatest.tools.Runner$.run(Runner.scala:850)
at org.scalatest.tools.Runner.run(Runner.scala)
at
org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.runScalaTest2(ScalaTestRunner.java:133)
at
org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.main(ScalaTestRunner.java:27)
Caused by: java.lang.reflect.InvocationTargetException
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at
org.apache.spark.sql.hive.client.Shim_v0_13.getPartitionsByFilter(HiveShim.scala:761)
... 66 more
Caused by: MetaException(message:Filtering is supported only on partition keys
of type string)
at
org.apache.hadoop.hive.metastore.parser.ExpressionTree$FilterBuilder.setError(ExpressionTree.java:185)
at
org.apache.hadoop.hive.metastore.parser.ExpressionTree$LeafNode.getJdoFilterPushdownParam(ExpressionTree.java:440)
at
org.apache.hadoop.hive.metastore.parser.ExpressionTree$LeafNode.generateJDOFilterOverPartitions(ExpressionTree.java:357)
at
org.apache.hadoop.hive.metastore.parser.ExpressionTree$LeafNode.generateJDOFilter(ExpressionTree.java:279)
at
org.apache.hadoop.hive.metastore.parser.ExpressionTree.generateJDOFilterFragment(ExpressionTree.java:578)
at
org.apache.hadoop.hive.metastore.ObjectStore.makeQueryFilterString(ObjectStore.java:2615)
at
org.apache.hadoop.hive.metastore.ObjectStore.getPartitionsViaOrmFilter(ObjectStore.java:2199)
at
org.apache.hadoop.hive.metastore.ObjectStore.access$500(ObjectStore.java:160)
at
org.apache.hadoop.hive.metastore.ObjectStore$5.getJdoResult(ObjectStore.java:2530)
at
org.apache.hadoop.hive.metastore.ObjectStore$5.getJdoResult(ObjectStore.java:2515)
at
org.apache.hadoop.hive.metastore.ObjectStore$GetHelper.run(ObjectStore.java:2391)
at
org.apache.hadoop.hive.metastore.ObjectStore.getPartitionsByFilterInternal(ObjectStore.java:2532)
at
org.apache.hadoop.hive.metastore.ObjectStore.getPartitionsByFilter(ObjectStore.java:2335)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at
org.apache.hadoop.hive.metastore.RawStoreProxy.invoke(RawStoreProxy.java:114)
at com.sun.proxy.$Proxy9.getPartitionsByFilter(Unknown Source)
at
org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.get_partitions_by_filter(HiveMetaStore.java:4448)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at
org.apache.hadoop.hive.metastore.RetryingHMSHandler.invoke(RetryingHMSHandler.java:107)
at com.sun.proxy.$Proxy11.get_partitions_by_filter(Unknown Source)
at
org.apache.hadoop.hive.metastore.HiveMetaStoreClient.listPartitionsByFilter(HiveMetaStoreClient.java:1105)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at
org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.invoke(RetryingMetaStoreClient.java:156)
at com.sun.proxy.$Proxy12.listPartitionsByFilter(Unknown Source)
at
org.apache.hadoop.hive.ql.metadata.Hive.getPartitionsByFilter(Hive.java:2254)
... 71 more
{noformat}
The reason is that Hive partition pruning is supported only on partition keys
of type string or integral types.
> Throws runtime exception when filter metastore partition key that's not
> string type or integral types
> -----------------------------------------------------------------------------------------------------
>
> Key: SPARK-30181
> URL: https://issues.apache.org/jira/browse/SPARK-30181
> Project: Spark
> Issue Type: Bug
> Components: SQL
> Affects Versions: 2.4.0, 2.4.1, 2.4.2, 2.4.3, 2.4.4
> Reporter: Yu-Jhe Li
> Priority: Major
>
> SQL below will throw a runtime exception since spark-2.4.0. I think it's a
> bug brought from SPARK-22384
> {code:scala}
> val df = Seq(
> (1, java.sql.Timestamp.valueOf("2019-12-01 00:00:00"), 1),
> (2, java.sql.Timestamp.valueOf("2019-12-01 01:00:00"), 1)
> ).toDF("id", "dt", "value")
> df.write.partitionBy("dt").mode("overwrite").saveAsTable("timestamp_part")
> spark.sql("select * from timestamp_part where dt >= '2019-12-01
> 00:00:00'").explain(true)
> {code}
> {noformat}
> Caught Hive MetaException attempting to get partition metadata by filter from
> Hive. You can set the Spark configuration setting
> spark.sql.hive.manageFilesourcePartitions to false to work around this
> problem, however this will result in degraded performance. Please report a
> bug: https://issues.apache.org/jira/browse/SPARK
> java.lang.RuntimeException: Caught Hive MetaException attempting to get
> partition metadata by filter from Hive. You can set the Spark configuration
> setting spark.sql.hive.manageFilesourcePartitions to false to work around
> this problem, however this will result in degraded performance. Please report
> a bug: https://issues.apache.org/jira/browse/SPARK
> at
> org.apache.spark.sql.hive.client.Shim_v0_13.getPartitionsByFilter(HiveShim.scala:774)
> at
> org.apache.spark.sql.hive.client.HiveClientImpl$$anonfun$getPartitionsByFilter$1.apply(HiveClientImpl.scala:679)
> at
> org.apache.spark.sql.hive.client.HiveClientImpl$$anonfun$getPartitionsByFilter$1.apply(HiveClientImpl.scala:677)
> at
> org.apache.spark.sql.hive.client.HiveClientImpl$$anonfun$withHiveState$1.apply(HiveClientImpl.scala:275)
> at
> org.apache.spark.sql.hive.client.HiveClientImpl.liftedTree1$1(HiveClientImpl.scala:213)
> at
> org.apache.spark.sql.hive.client.HiveClientImpl.retryLocked(HiveClientImpl.scala:212)
> at
> org.apache.spark.sql.hive.client.HiveClientImpl.withHiveState(HiveClientImpl.scala:258)
> at
> org.apache.spark.sql.hive.client.HiveClientImpl.getPartitionsByFilter(HiveClientImpl.scala:677)
> at
> org.apache.spark.sql.hive.client.HiveClientSuite.testMetastorePartitionFiltering(HiveClientSuite.scala:310)
> at
> org.apache.spark.sql.hive.client.HiveClientSuite.org$apache$spark$sql$hive$client$HiveClientSuite$$testMetastorePartitionFiltering(HiveClientSuite.scala:282)
> at
> org.apache.spark.sql.hive.client.HiveClientSuite$$anonfun$1.apply$mcV$sp(HiveClientSuite.scala:105)
> at
> org.apache.spark.sql.hive.client.HiveClientSuite$$anonfun$1.apply(HiveClientSuite.scala:105)
> at
> org.apache.spark.sql.hive.client.HiveClientSuite$$anonfun$1.apply(HiveClientSuite.scala:105)
> at org.scalatest.OutcomeOf$class.outcomeOf(OutcomeOf.scala:85)
> at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104)
> at org.scalatest.Transformer.apply(Transformer.scala:22)
> at org.scalatest.Transformer.apply(Transformer.scala:20)
> at org.scalatest.FunSuiteLike$$anon$1.apply(FunSuiteLike.scala:186)
> at org.apache.spark.SparkFunSuite.withFixture(SparkFunSuite.scala:103)
> at
> org.scalatest.FunSuiteLike$class.invokeWithFixture$1(FunSuiteLike.scala:183)
> at
> org.scalatest.FunSuiteLike$$anonfun$runTest$1.apply(FunSuiteLike.scala:196)
> at
> org.scalatest.FunSuiteLike$$anonfun$runTest$1.apply(FunSuiteLike.scala:196)
> at org.scalatest.SuperEngine.runTestImpl(Engine.scala:289)
> at org.scalatest.FunSuiteLike$class.runTest(FunSuiteLike.scala:196)
> at org.scalatest.FunSuite.runTest(FunSuite.scala:1560)
> at
> org.scalatest.FunSuiteLike$$anonfun$runTests$1.apply(FunSuiteLike.scala:229)
> at
> org.scalatest.FunSuiteLike$$anonfun$runTests$1.apply(FunSuiteLike.scala:229)
> at
> org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:396)
> at
> org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:384)
> at scala.collection.immutable.List.foreach(List.scala:392)
> at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:384)
> at
> org.scalatest.SuperEngine.org$scalatest$SuperEngine$$runTestsInBranch(Engine.scala:379)
> at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:461)
> at org.scalatest.FunSuiteLike$class.runTests(FunSuiteLike.scala:229)
> at org.scalatest.FunSuite.runTests(FunSuite.scala:1560)
> at org.scalatest.Suite$class.run(Suite.scala:1147)
> at
> org.scalatest.FunSuite.org$scalatest$FunSuiteLike$$super$run(FunSuite.scala:1560)
> at org.scalatest.FunSuiteLike$$anonfun$run$1.apply(FunSuiteLike.scala:233)
> at org.scalatest.FunSuiteLike$$anonfun$run$1.apply(FunSuiteLike.scala:233)
> at org.scalatest.SuperEngine.runImpl(Engine.scala:521)
> at org.scalatest.FunSuiteLike$class.run(FunSuiteLike.scala:233)
> at
> org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterAll$$super$run(SparkFunSuite.scala:52)
> at
> org.scalatest.BeforeAndAfterAll$class.liftedTree1$1(BeforeAndAfterAll.scala:213)
> at org.scalatest.BeforeAndAfterAll$class.run(BeforeAndAfterAll.scala:210)
> at org.apache.spark.SparkFunSuite.run(SparkFunSuite.scala:52)
> at org.scalatest.Suite$class.callExecuteOnSuite$1(Suite.scala:1210)
> at org.scalatest.Suite$$anonfun$runNestedSuites$1.apply(Suite.scala:1257)
> at org.scalatest.Suite$$anonfun$runNestedSuites$1.apply(Suite.scala:1255)
> at
> scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33)
> at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:186)
> at org.scalatest.Suite$class.runNestedSuites(Suite.scala:1255)
> at
> org.apache.spark.sql.hive.client.HiveClientSuites.runNestedSuites(HiveClientSuites.scala:24)
> at org.scalatest.Suite$class.run(Suite.scala:1144)
> at
> org.apache.spark.sql.hive.client.HiveClientSuites.run(HiveClientSuites.scala:24)
> at org.scalatest.tools.SuiteRunner.run(SuiteRunner.scala:45)
> at
> org.scalatest.tools.Runner$$anonfun$doRunRunRunDaDoRunRun$1.apply(Runner.scala:1340)
> at
> org.scalatest.tools.Runner$$anonfun$doRunRunRunDaDoRunRun$1.apply(Runner.scala:1334)
> at scala.collection.immutable.List.foreach(List.scala:392)
> at org.scalatest.tools.Runner$.doRunRunRunDaDoRunRun(Runner.scala:1334)
> at
> org.scalatest.tools.Runner$$anonfun$runOptionallyWithPassFailReporter$2.apply(Runner.scala:1011)
> at
> org.scalatest.tools.Runner$$anonfun$runOptionallyWithPassFailReporter$2.apply(Runner.scala:1010)
> at
> org.scalatest.tools.Runner$.withClassLoaderAndDispatchReporter(Runner.scala:1500)
> at
> org.scalatest.tools.Runner$.runOptionallyWithPassFailReporter(Runner.scala:1010)
> at org.scalatest.tools.Runner$.run(Runner.scala:850)
> at org.scalatest.tools.Runner.run(Runner.scala)
> at
> org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.runScalaTest2(ScalaTestRunner.java:133)
> at
> org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.main(ScalaTestRunner.java:27)
> Caused by: java.lang.reflect.InvocationTargetException
> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> at
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> at java.lang.reflect.Method.invoke(Method.java:498)
> at
> org.apache.spark.sql.hive.client.Shim_v0_13.getPartitionsByFilter(HiveShim.scala:761)
> ... 66 more
> Caused by: MetaException(message:Filtering is supported only on partition
> keys of type string)
> at
> org.apache.hadoop.hive.metastore.parser.ExpressionTree$FilterBuilder.setError(ExpressionTree.java:185)
> at
> org.apache.hadoop.hive.metastore.parser.ExpressionTree$LeafNode.getJdoFilterPushdownParam(ExpressionTree.java:440)
> at
> org.apache.hadoop.hive.metastore.parser.ExpressionTree$LeafNode.generateJDOFilterOverPartitions(ExpressionTree.java:357)
> at
> org.apache.hadoop.hive.metastore.parser.ExpressionTree$LeafNode.generateJDOFilter(ExpressionTree.java:279)
> at
> org.apache.hadoop.hive.metastore.parser.ExpressionTree.generateJDOFilterFragment(ExpressionTree.java:578)
> at
> org.apache.hadoop.hive.metastore.ObjectStore.makeQueryFilterString(ObjectStore.java:2615)
> at
> org.apache.hadoop.hive.metastore.ObjectStore.getPartitionsViaOrmFilter(ObjectStore.java:2199)
> at
> org.apache.hadoop.hive.metastore.ObjectStore.access$500(ObjectStore.java:160)
> at
> org.apache.hadoop.hive.metastore.ObjectStore$5.getJdoResult(ObjectStore.java:2530)
> at
> org.apache.hadoop.hive.metastore.ObjectStore$5.getJdoResult(ObjectStore.java:2515)
> at
> org.apache.hadoop.hive.metastore.ObjectStore$GetHelper.run(ObjectStore.java:2391)
> at
> org.apache.hadoop.hive.metastore.ObjectStore.getPartitionsByFilterInternal(ObjectStore.java:2532)
> at
> org.apache.hadoop.hive.metastore.ObjectStore.getPartitionsByFilter(ObjectStore.java:2335)
> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> at
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> at java.lang.reflect.Method.invoke(Method.java:498)
> at
> org.apache.hadoop.hive.metastore.RawStoreProxy.invoke(RawStoreProxy.java:114)
> at com.sun.proxy.$Proxy9.getPartitionsByFilter(Unknown Source)
> at
> org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.get_partitions_by_filter(HiveMetaStore.java:4448)
> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> at
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> at java.lang.reflect.Method.invoke(Method.java:498)
> at
> org.apache.hadoop.hive.metastore.RetryingHMSHandler.invoke(RetryingHMSHandler.java:107)
> at com.sun.proxy.$Proxy11.get_partitions_by_filter(Unknown Source)
> at
> org.apache.hadoop.hive.metastore.HiveMetaStoreClient.listPartitionsByFilter(HiveMetaStoreClient.java:1105)
> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> at
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> at java.lang.reflect.Method.invoke(Method.java:498)
> at
> org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.invoke(RetryingMetaStoreClient.java:156)
> at com.sun.proxy.$Proxy12.listPartitionsByFilter(Unknown Source)
> at
> org.apache.hadoop.hive.ql.metadata.Hive.getPartitionsByFilter(Hive.java:2254)
> ... 71 more
> {noformat}
> The reason is that Hive partition pruning is supported only on partition keys
> of type string or integral types.
--
This message was sent by Atlassian Jira
(v8.3.4#803005)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]