This is an automated email from the ASF dual-hosted git repository. lixiao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new f3ddd6f [SPARK-27402][SQL][TEST-HADOOP3.2][TEST-MAVEN] Fix hadoop-3.2 test issue(except the hive-thriftserver module) f3ddd6f is described below commit f3ddd6f9da27925607c06e55cdfb9a809633238b Author: Yuming Wang <yumw...@ebay.com> AuthorDate: Mon May 13 10:35:26 2019 -0700 [SPARK-27402][SQL][TEST-HADOOP3.2][TEST-MAVEN] Fix hadoop-3.2 test issue(except the hive-thriftserver module) ## What changes were proposed in this pull request? This pr fix hadoop-3.2 test issues(except the `hive-thriftserver` module): 1. Add `hive.metastore.schema.verification` and `datanucleus.schema.autoCreateAll` to HiveConf. 2. hadoop-3.2 support access the Hive metastore from 0.12 to 2.2 After [SPARK-27176](https://issues.apache.org/jira/browse/SPARK-27176) and this PR, we upgraded the built-in Hive to 2.3 when enabling the Hadoop 3.2+ profile. This upgrade fixes the following issues: - [HIVE-6727](https://issues.apache.org/jira/browse/HIVE-6727): Table level stats for external tables are set incorrectly. - [HIVE-15653](https://issues.apache.org/jira/browse/HIVE-15653): Some ALTER TABLE commands drop table stats. - [SPARK-12014](https://issues.apache.org/jira/browse/SPARK-12014): Spark SQL query containing semicolon is broken in Beeline. - [SPARK-25193](https://issues.apache.org/jira/browse/SPARK-25193): insert overwrite doesn't throw exception when drop old data fails. - [SPARK-25919](https://issues.apache.org/jira/browse/SPARK-25919): Date value corrupts when tables are "ParquetHiveSerDe" formatted and target table is Partitioned. - [SPARK-26332](https://issues.apache.org/jira/browse/SPARK-26332): Spark sql write orc table on viewFS throws exception. - [SPARK-26437](https://issues.apache.org/jira/browse/SPARK-26437): Decimal data becomes bigint to query, unable to query. ## How was this patch tested? This pr test Spark’s Hadoop 3.2 profile on jenkins and #24591 test Spark’s Hadoop 2.7 profile on jenkins This PR close #24591 Closes #24391 from wangyum/SPARK-27402. Authored-by: Yuming Wang <yumw...@ebay.com> Signed-off-by: gatorsmile <gatorsm...@gmail.com> --- dev/sparktestsupport/modules.py | 14 ++++++- .../spark/sql/execution/command/DDLSuite.scala | 2 +- .../execution/datasources/orc/OrcSourceSuite.scala | 4 +- .../spark/sql/hive/client/HiveClientImpl.scala | 25 +++++++++++- .../sql/hive/client/IsolatedClientLoader.scala | 2 + .../org/apache/spark/sql/hive/test/TestHive.scala | 9 ++++- sql/hive/src/test/resources/hive-contrib-2.3.4.jar | Bin 0 -> 125719 bytes .../test/resources/hive-hcatalog-core-2.3.4.jar | Bin 0 -> 263795 bytes .../sql/hive/ClasspathDependenciesSuite.scala | 21 ++++++++-- .../hive/HiveExternalCatalogVersionsSuite.scala | 4 ++ .../spark/sql/hive/HiveMetastoreCatalogSuite.scala | 17 ++++++-- .../org/apache/spark/sql/hive/HiveShimSuite.scala | 12 +++++- .../apache/spark/sql/hive/StatisticsSuite.scala | 34 +++++++++++----- .../spark/sql/hive/orc/HiveOrcFilterSuite.scala | 43 ++++++++++++++------- .../spark/sql/hive/orc/HiveOrcSourceSuite.scala | 9 ++++- 15 files changed, 154 insertions(+), 42 deletions(-) diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py index d496eec..812c2ef 100644 --- a/dev/sparktestsupport/modules.py +++ b/dev/sparktestsupport/modules.py @@ -15,9 +15,17 @@ # limitations under the License. # +from __future__ import print_function from functools import total_ordering import itertools import re +import os + +if os.environ.get("AMPLAB_JENKINS"): + hadoop_version = os.environ.get("AMPLAB_JENKINS_BUILD_PROFILE", "hadoop2.7") +else: + hadoop_version = os.environ.get("HADOOP_PROFILE", "hadoop2.7") +print("[info] Choosing supported modules with Hadoop profile", hadoop_version) all_modules = [] @@ -72,7 +80,11 @@ class Module(object): self.dependent_modules = set() for dep in dependencies: dep.dependent_modules.add(self) - all_modules.append(self) + # TODO: Skip hive-thriftserver module for hadoop-3.2. remove this once hadoop-3.2 support it + if name == "hive-thriftserver" and hadoop_version == "hadoop3.2": + print("[info] Skip unsupported module:", name) + else: + all_modules.append(self) def contains_file(self, filename): return any(re.match(p, filename) for p in self.source_file_prefixes) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala index 001e4ac..6a7849d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala @@ -1371,7 +1371,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils { // if (isUsingHiveMetastore) { // assert(storageFormat.properties.get("path") === expected) // } - assert(storageFormat.locationUri === Some(expected)) + assert(storageFormat.locationUri.map(_.getPath) === Some(expected.getPath)) } // set table location sql("ALTER TABLE dbx.tab1 SET LOCATION '/path/to/your/lovely/heart'") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala index 4891010..8f9cc62 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala @@ -118,7 +118,7 @@ abstract class OrcSuite extends OrcTest with BeforeAndAfterAll { } } - protected def testSelectiveDictionaryEncoding(isSelective: Boolean) { + protected def testSelectiveDictionaryEncoding(isSelective: Boolean, isHive23: Boolean = false) { val tableName = "orcTable" withTempDir { dir => @@ -171,7 +171,7 @@ abstract class OrcSuite extends OrcTest with BeforeAndAfterAll { // Hive 0.11 and RLE v2 is introduced in Hive 0.12 ORC with more improvements. // For more details, see https://orc.apache.org/specification/ assert(stripe.getColumns(1).getKind === DICTIONARY_V2) - if (isSelective) { + if (isSelective || isHive23) { assert(stripe.getColumns(2).getKind === DIRECT_V2) } else { assert(stripe.getColumns(2).getKind === DICTIONARY_V2) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala index 92a1120..bad2110 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala @@ -52,6 +52,7 @@ import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParseException} import org.apache.spark.sql.execution.QueryExecutionException import org.apache.spark.sql.execution.command.DDLUtils import org.apache.spark.sql.hive.HiveExternalCatalog.{DATASOURCE_SCHEMA, DATASOURCE_SCHEMA_NUMPARTS, DATASOURCE_SCHEMA_PART_PREFIX} +import org.apache.spark.sql.hive.HiveUtils import org.apache.spark.sql.hive.client.HiveClientImpl._ import org.apache.spark.sql.types._ import org.apache.spark.util.{CircularBuffer, Utils} @@ -191,7 +192,29 @@ private[hive] class HiveClientImpl( } /** Returns the configuration for the current session. */ - def conf: HiveConf = state.getConf + def conf: HiveConf = if (!HiveUtils.isHive23) { + state.getConf + } else { + val hiveConf = state.getConf + // Hive changed the default of datanucleus.schema.autoCreateAll from true to false + // and hive.metastore.schema.verification from false to true since Hive 2.0. + // For details, see the JIRA HIVE-6113, HIVE-12463 and HIVE-1841. + // isEmbeddedMetaStore should not be true in the production environment. + // We hard-code hive.metastore.schema.verification and datanucleus.schema.autoCreateAll to allow + // bin/spark-shell, bin/spark-sql and sbin/start-thriftserver.sh to automatically create the + // Derby Metastore when running Spark in the non-production environment. + val isEmbeddedMetaStore = { + val msUri = hiveConf.getVar(ConfVars.METASTOREURIS) + val msConnUrl = hiveConf.getVar(ConfVars.METASTORECONNECTURLKEY) + (msUri == null || msUri.trim().isEmpty) && + (msConnUrl != null && msConnUrl.startsWith("jdbc:derby")) + } + if (isEmbeddedMetaStore) { + hiveConf.setBoolean("hive.metastore.schema.verification", false) + hiveConf.setBoolean("datanucleus.schema.autoCreateAll", true) + } + hiveConf + } private val userName = conf.getUser diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala index 98999eb..4e779da 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala @@ -28,6 +28,7 @@ import org.apache.commons.io.{FileUtils, IOUtils} import org.apache.commons.lang3.{JavaVersion, SystemUtils} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.hive.conf.HiveConf.ConfVars +import org.apache.hadoop.hive.shims.ShimLoader import org.apache.spark.SparkConf import org.apache.spark.deploy.SparkSubmitUtils @@ -196,6 +197,7 @@ private[hive] class IsolatedClientLoader( protected def isBarrierClass(name: String): Boolean = name.startsWith(classOf[HiveClientImpl].getName) || name.startsWith(classOf[Shim].getName) || + name.startsWith(classOf[ShimLoader].getName) || barrierPrefixes.exists(name.startsWith) protected def classToPath(name: String): String = diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala index e8a749f..4db6e43 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala @@ -63,6 +63,9 @@ object TestHive // SPARK-8910 .set(UI_ENABLED, false) .set(config.UNSAFE_EXCEPTION_ON_MEMORY_LEAK, true) + // Hive changed the default of hive.metastore.disallow.incompatible.col.type.changes + // from false to true. For details, see the JIRA HIVE-12320 and HIVE-17764. + .set("spark.hadoop.hive.metastore.disallow.incompatible.col.type.changes", "false") // Disable ConvertToLocalRelation for better test coverage. Test cases built on // LocalRelation will exercise the optimization rules better by disabling it as // this rule may potentially block testing of other optimization rules such as @@ -120,8 +123,10 @@ class TestHiveContext( @transient override val sparkSession: TestHiveSparkSession) extends SQLContext(sparkSession) { - val HIVE_CONTRIB_JAR: String = "hive-contrib-0.13.1.jar" - val HIVE_HCATALOG_CORE_JAR: String = "hive-hcatalog-core-0.13.1.jar" + val HIVE_CONTRIB_JAR: String = + if (HiveUtils.isHive23) "hive-contrib-2.3.4.jar" else "hive-contrib-0.13.1.jar" + val HIVE_HCATALOG_CORE_JAR: String = + if (HiveUtils.isHive23) "hive-hcatalog-core-2.3.4.jar" else "hive-hcatalog-core-0.13.1.jar" /** * If loadTestTables is false, no test tables are loaded. Note that this flag can only be true diff --git a/sql/hive/src/test/resources/hive-contrib-2.3.4.jar b/sql/hive/src/test/resources/hive-contrib-2.3.4.jar new file mode 100644 index 0000000..01c5a24 Binary files /dev/null and b/sql/hive/src/test/resources/hive-contrib-2.3.4.jar differ diff --git a/sql/hive/src/test/resources/hive-hcatalog-core-2.3.4.jar b/sql/hive/src/test/resources/hive-hcatalog-core-2.3.4.jar new file mode 100644 index 0000000..d67afda Binary files /dev/null and b/sql/hive/src/test/resources/hive-hcatalog-core-2.3.4.jar differ diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ClasspathDependenciesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ClasspathDependenciesSuite.scala index f262ef6..a696d6a 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ClasspathDependenciesSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ClasspathDependenciesSuite.scala @@ -58,11 +58,19 @@ class ClasspathDependenciesSuite extends SparkFunSuite { } test("shaded Protobuf") { - assertLoads("org.apache.hive.com.google.protobuf.ServiceException") + if (HiveUtils.isHive23) { + assertLoads("com.google.protobuf.ServiceException") + } else { + assertLoads("org.apache.hive.com.google.protobuf.ServiceException") + } } test("shaded Kryo") { - assertLoads("org.apache.hive.com.esotericsoftware.kryo.Kryo") + if (HiveUtils.isHive23) { + assertLoads("com.esotericsoftware.kryo.Kryo") + } else { + assertLoads("org.apache.hive.com.esotericsoftware.kryo.Kryo") + } } test("hive-common") { @@ -81,7 +89,12 @@ class ClasspathDependenciesSuite extends SparkFunSuite { } test("parquet-hadoop-bundle") { - assertLoads("parquet.hadoop.ParquetOutputFormat") - assertLoads("parquet.hadoop.ParquetInputFormat") + if (HiveUtils.isHive23) { + assertLoads("org.apache.parquet.hadoop.ParquetOutputFormat") + assertLoads("org.apache.parquet.hadoop.ParquetInputFormat") + } else { + assertLoads("parquet.hadoop.ParquetOutputFormat") + assertLoads("parquet.hadoop.ParquetInputFormat") + } } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala index f4c314c..4351dc7 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala @@ -186,6 +186,8 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils { "--master", "local[2]", "--conf", "spark.ui.enabled=false", "--conf", "spark.master.rest.enabled=false", + "--conf", "spark.sql.hive.metastore.version=1.2.1", + "--conf", "spark.sql.hive.metastore.jars=maven", "--conf", s"spark.sql.warehouse.dir=${wareHousePath.getCanonicalPath}", "--conf", s"spark.sql.test.version.index=$index", "--driver-java-options", s"-Dderby.system.home=${wareHousePath.getCanonicalPath}", @@ -203,6 +205,8 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils { "--master", "local[2]", "--conf", "spark.ui.enabled=false", "--conf", "spark.master.rest.enabled=false", + "--conf", "spark.sql.hive.metastore.version=1.2.1", + "--conf", "spark.sql.hive.metastore.jars=maven", "--conf", s"spark.sql.warehouse.dir=${wareHousePath.getCanonicalPath}", "--driver-java-options", s"-Dderby.system.home=${wareHousePath.getCanonicalPath}", unusedJar.toString) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala index 5c9261c..deb0a10 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala @@ -206,7 +206,13 @@ class DataSourceWithHiveMetastoreCatalogSuite assert(columns.map(_.dataType) === Seq(DecimalType(10, 3), StringType)) checkAnswer(table("t"), testDF) - assert(sparkSession.metadataHive.runSqlHive("SELECT * FROM t") === Seq("1.1\t1", "2.1\t2")) + if (HiveUtils.isHive23) { + assert(sparkSession.metadataHive.runSqlHive("SELECT * FROM t") === + Seq("1.100\t1", "2.100\t2")) + } else { + assert(sparkSession.metadataHive.runSqlHive("SELECT * FROM t") === + Seq("1.1\t1", "2.1\t2")) + } } } @@ -238,8 +244,13 @@ class DataSourceWithHiveMetastoreCatalogSuite assert(columns.map(_.dataType) === Seq(DecimalType(10, 3), StringType)) checkAnswer(table("t"), testDF) - assert(sparkSession.metadataHive.runSqlHive("SELECT * FROM t") === - Seq("1.1\t1", "2.1\t2")) + if (HiveUtils.isHive23) { + assert(sparkSession.metadataHive.runSqlHive("SELECT * FROM t") === + Seq("1.100\t1", "2.100\t2")) + } else { + assert(sparkSession.metadataHive.runSqlHive("SELECT * FROM t") === + Seq("1.1\t1", "2.1\t2")) + } } } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShimSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShimSuite.scala index a716f73..14d07cd 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShimSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShimSuite.scala @@ -35,10 +35,18 @@ class HiveShimSuite extends SparkFunSuite { // test when READ_COLUMN_NAMES_CONF_STR is empty HiveShim.appendReadColumns(conf, ids, names) - assert(names.asJava === ColumnProjectionUtils.getReadColumnNames(conf)) + if (HiveUtils.isHive23) { + assert(names === ColumnProjectionUtils.getReadColumnNames(conf)) + } else { + assert(names.asJava === ColumnProjectionUtils.getReadColumnNames(conf)) + } // test when READ_COLUMN_NAMES_CONF_STR is non-empty HiveShim.appendReadColumns(conf, moreIds, moreNames) - assert((names ++ moreNames).asJava === ColumnProjectionUtils.getReadColumnNames(conf)) + if (HiveUtils.isHive23) { + assert((names ++ moreNames) === ColumnProjectionUtils.getReadColumnNames(conf)) + } else { + assert((names ++ moreNames).asJava === ColumnProjectionUtils.getReadColumnNames(conf)) + } } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala index 630f02c..81cac9d 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala @@ -100,8 +100,14 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto .asInstanceOf[HiveTableRelation] val properties = relation.tableMeta.ignoredProperties - assert(properties("totalSize").toLong <= 0, "external table totalSize must be <= 0") - assert(properties("rawDataSize").toLong <= 0, "external table rawDataSize must be <= 0") + if (HiveUtils.isHive23) { + // Since HIVE-6727, Hive fixes table-level stats for external tables are incorrect. + assert(properties("totalSize").toLong == 6) + assert(properties.get("rawDataSize").isEmpty) + } else { + assert(properties("totalSize").toLong <= 0, "external table totalSize must be <= 0") + assert(properties("rawDataSize").toLong <= 0, "external table rawDataSize must be <= 0") + } val sizeInBytes = relation.stats.sizeInBytes assert(sizeInBytes === BigInt(file1.length() + file2.length())) @@ -865,17 +871,25 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto val totalSize = extractStatsPropValues(describeResult, "totalSize") assert(totalSize.isDefined && totalSize.get > 0, "totalSize is lost") - // ALTER TABLE SET/UNSET TBLPROPERTIES invalidates some Hive specific statistics, but not - // Spark specific statistics. This is triggered by the Hive alterTable API. val numRows = extractStatsPropValues(describeResult, "numRows") - assert(numRows.isDefined && numRows.get == -1, "numRows is lost") - val rawDataSize = extractStatsPropValues(describeResult, "rawDataSize") - assert(rawDataSize.isDefined && rawDataSize.get == -1, "rawDataSize is lost") - - if (analyzedBySpark) { + if (HiveUtils.isHive23) { + // Since HIVE-15653(Hive 2.3.0), Hive fixs some ALTER TABLE commands drop table stats. + assert(numRows.isDefined && numRows.get == 500) + val rawDataSize = extractStatsPropValues(describeResult, "rawDataSize") + assert(rawDataSize.isDefined && rawDataSize.get == 5312) checkTableStats(tabName, hasSizeInBytes = true, expectedRowCounts = Some(500)) } else { - checkTableStats(tabName, hasSizeInBytes = true, expectedRowCounts = None) + // ALTER TABLE SET/UNSET TBLPROPERTIES invalidates some Hive specific statistics, but not + // Spark specific statistics. This is triggered by the Hive alterTable API. + assert(numRows.isDefined && numRows.get == -1, "numRows is lost") + val rawDataSize = extractStatsPropValues(describeResult, "rawDataSize") + assert(rawDataSize.isDefined && rawDataSize.get == -1, "rawDataSize is lost") + + if (analyzedBySpark) { + checkTableStats(tabName, hasSizeInBytes = true, expectedRowCounts = Some(500)) + } else { + checkTableStats(tabName, hasSizeInBytes = true, expectedRowCounts = None) + } } } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcFilterSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcFilterSuite.scala index 5094763..6de47a6 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcFilterSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcFilterSuite.scala @@ -30,6 +30,7 @@ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.planning.PhysicalOperation import org.apache.spark.sql.execution.datasources.{DataSourceStrategy, HadoopFsRelation, LogicalRelation} import org.apache.spark.sql.execution.datasources.orc.OrcTest +import org.apache.spark.sql.hive.HiveUtils import org.apache.spark.sql.hive.test.TestHiveSingleton import org.apache.spark.sql.types._ @@ -76,15 +77,27 @@ class HiveOrcFilterSuite extends OrcTest with TestHiveSingleton { checkFilterPredicate(df, predicate, checkComparisonOperator) } - private def checkFilterPredicate + private def checkFilterPredicateWithDiffHiveVersion (predicate: Predicate, stringExpr: String) (implicit df: DataFrame): Unit = { def checkLogicalOperator(filter: SearchArgument) = { - assert(filter.toString == stringExpr) + if (HiveUtils.isHive23) { + assert(filter.toString == stringExpr.replace("\n", ", ")) + } else { + assert(filter.toString == stringExpr) + } } checkFilterPredicate(df, predicate, checkLogicalOperator) } + private def assertResultWithDiffHiveVersion(expected : String)(c : scala.Any) = { + if (HiveUtils.isHive23) { + assertResult(expected.replace("\n", ", "))(c) + } else { + assertResult(expected)(c) + } + } + private def checkNoFilterPredicate (predicate: Predicate) (implicit df: DataFrame): Unit = { @@ -295,30 +308,30 @@ class HiveOrcFilterSuite extends OrcTest with TestHiveSingleton { // such as `and`, `or` or `not`. So, this function uses `SearchArgument.toString()` // to produce string expression and then compare it to given string expression below. // This might have to be changed after Hive version is upgraded. - checkFilterPredicate( + checkFilterPredicateWithDiffHiveVersion( '_1.isNotNull, """leaf-0 = (IS_NULL _1) |expr = (not leaf-0)""".stripMargin.trim ) - checkFilterPredicate( + checkFilterPredicateWithDiffHiveVersion( '_1 =!= 1, """leaf-0 = (IS_NULL _1) |leaf-1 = (EQUALS _1 1) |expr = (and (not leaf-0) (not leaf-1))""".stripMargin.trim ) - checkFilterPredicate( + checkFilterPredicateWithDiffHiveVersion( !('_1 < 4), """leaf-0 = (IS_NULL _1) |leaf-1 = (LESS_THAN _1 4) |expr = (and (not leaf-0) (not leaf-1))""".stripMargin.trim ) - checkFilterPredicate( + checkFilterPredicateWithDiffHiveVersion( '_1 < 2 || '_1 > 3, """leaf-0 = (LESS_THAN _1 2) |leaf-1 = (LESS_THAN_EQUALS _1 3) |expr = (or leaf-0 (not leaf-1))""".stripMargin.trim ) - checkFilterPredicate( + checkFilterPredicateWithDiffHiveVersion( '_1 < 2 && '_1 > 3, """leaf-0 = (IS_NULL _1) |leaf-1 = (LESS_THAN _1 2) @@ -341,9 +354,11 @@ class HiveOrcFilterSuite extends OrcTest with TestHiveSingleton { checkNoFilterPredicate('_1 <=> 1.b) } // DateType - val stringDate = "2015-01-01" - withOrcDataFrame(Seq(Tuple1(Date.valueOf(stringDate)))) { implicit df => - checkNoFilterPredicate('_1 === Date.valueOf(stringDate)) + if (!HiveUtils.isHive23) { + val stringDate = "2015-01-01" + withOrcDataFrame(Seq(Tuple1(Date.valueOf(stringDate)))) { implicit df => + checkNoFilterPredicate('_1 === Date.valueOf(stringDate)) + } } // MapType withOrcDataFrame((1 to 4).map(i => Tuple1(Map(i -> i)))) { implicit df => @@ -358,7 +373,7 @@ class HiveOrcFilterSuite extends OrcTest with TestHiveSingleton { Array( StructField("a", IntegerType, nullable = true), StructField("b", StringType, nullable = true))) - assertResult( + assertResultWithDiffHiveVersion( """leaf-0 = (LESS_THAN a 10) |expr = leaf-0 """.stripMargin.trim @@ -370,7 +385,7 @@ class HiveOrcFilterSuite extends OrcTest with TestHiveSingleton { } // The `LessThan` should be converted while the whole inner `And` shouldn't - assertResult( + assertResultWithDiffHiveVersion( """leaf-0 = (LESS_THAN a 10) |expr = leaf-0 """.stripMargin.trim @@ -396,7 +411,7 @@ class HiveOrcFilterSuite extends OrcTest with TestHiveSingleton { )).isEmpty) // Safely remove unsupported `StringContains` predicate and push down `LessThan` - assertResult( + assertResultWithDiffHiveVersion( """leaf-0 = (LESS_THAN a 10) |expr = leaf-0 """.stripMargin.trim @@ -410,7 +425,7 @@ class HiveOrcFilterSuite extends OrcTest with TestHiveSingleton { } // Safely remove unsupported `StringContains` predicate, push down `LessThan` and `GreaterThan`. - assertResult( + assertResultWithDiffHiveVersion( """leaf-0 = (LESS_THAN a 10) |leaf-1 = (LESS_THAN_EQUALS a 1) |expr = (and leaf-0 (not leaf-1)) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala index c46512b..6bcb222 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala @@ -149,7 +149,12 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton { test("Check BloomFilter creation") { Seq(true, false).foreach { convertMetastore => withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> s"$convertMetastore") { - testBloomFilterCreation(org.apache.orc.OrcProto.Stream.Kind.BLOOM_FILTER) // Before ORC-101 + if (HiveUtils.isHive23) { + testBloomFilterCreation(org.apache.orc.OrcProto.Stream.Kind.BLOOM_FILTER_UTF8) + } else { + // Before ORC-101 + testBloomFilterCreation(org.apache.orc.OrcProto.Stream.Kind.BLOOM_FILTER) + } } } } @@ -157,7 +162,7 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton { test("Enforce direct encoding column-wise selectively") { Seq(true, false).foreach { convertMetastore => withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> s"$convertMetastore") { - testSelectiveDictionaryEncoding(isSelective = false) + testSelectiveDictionaryEncoding(isSelective = false, isHive23 = HiveUtils.isHive23) } } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org