[GitHub] [spark] LuciferYang commented on a change in pull request #35335: [SPARK-38036][SQL][TESTS] Refactor `VersionsSuite` to `HiveClientSuite` and make it a subclass of `HiveVersionSuite`

GitBox Wed, 26 Jan 2022 19:07:53 -0800


LuciferYang commented on a change in pull request #35335:
URL: https://github.com/apache/spark/pull/35335#discussion_r793219289




##########
File path: 
sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
##########
@@ -1,1159 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.hive.client
-
-import java.io.{ByteArrayOutputStream, File, PrintStream, PrintWriter}
-import java.net.URI
-
-import org.apache.commons.lang3.{JavaVersion, SystemUtils}
-import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.Path
-import org.apache.hadoop.hive.common.StatsSetupConst
-import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe
-import org.apache.hadoop.mapred.TextInputFormat
-import org.apache.hadoop.security.UserGroupInformation
-
-import org.apache.spark.SparkFunSuite
-import org.apache.spark.internal.Logging
-import org.apache.spark.sql.{AnalysisException, Row}
-import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
-import org.apache.spark.sql.catalyst.analysis.{DatabaseAlreadyExistsException, 
NoSuchDatabaseException, NoSuchPermanentFunctionException, 
PartitionsAlreadyExistException}
-import org.apache.spark.sql.catalyst.catalog._
-import org.apache.spark.sql.catalyst.expressions.{AttributeReference, EqualTo, 
Literal}
-import org.apache.spark.sql.catalyst.util.quietly
-import org.apache.spark.sql.hive.{HiveExternalCatalog, HiveUtils}
-import org.apache.spark.sql.hive.test.TestHiveVersion
-import org.apache.spark.sql.types.IntegerType
-import org.apache.spark.sql.types.StructType
-import org.apache.spark.tags.{ExtendedHiveTest, SlowHiveTest}
-import org.apache.spark.util.{MutableURLClassLoader, Utils}
-
-/**
- * A simple set of tests that call the methods of a [[HiveClient]], loading 
different version
- * of hive from maven central.  These tests are simple in that they are mostly 
just testing to make
- * sure that reflective calls are not throwing NoSuchMethod error, but the 
actually functionality
- * is not fully tested.
- */
-// TODO: Refactor this to `HiveClientSuite` and make it a subclass of 
`HiveVersionSuite`
-@SlowHiveTest
-@ExtendedHiveTest
-class VersionsSuite extends SparkFunSuite with Logging {
-
-  override protected val enableAutoThreadAudit = false
-
-  import HiveClientBuilder.buildClient
-
-  /**
-   * Drops table `tableName` after calling `f`.
-   */
-  protected def withTable(tableNames: String*)(f: => Unit): Unit = {
-    try f finally {
-      tableNames.foreach { name =>
-        versionSpark.sql(s"DROP TABLE IF EXISTS $name")
-      }
-    }
-  }
-
-  test("success sanity check") {
-    val badClient = buildClient(HiveUtils.builtinHiveVersion, new 
Configuration())
-    val db = new CatalogDatabase("default", "desc", new URI("loc"), Map())
-    badClient.createDatabase(db, ignoreIfExists = true)
-  }
-
-  test("hadoop configuration preserved") {
-    val hadoopConf = new Configuration()
-    hadoopConf.set("test", "success")
-    val client = buildClient(HiveUtils.builtinHiveVersion, hadoopConf)
-    assert("success" === client.getConf("test", null))
-  }
-
-  test("override useless and side-effect hive configurations ") {
-    val hadoopConf = new Configuration()
-    // These hive flags should be reset by spark
-    hadoopConf.setBoolean("hive.cbo.enable", true)
-    hadoopConf.setBoolean("hive.session.history.enabled", true)
-    hadoopConf.set("hive.execution.engine", "tez")
-    val client = buildClient(HiveUtils.builtinHiveVersion, hadoopConf)
-    assert(!client.getConf("hive.cbo.enable", "true").toBoolean)
-    assert(!client.getConf("hive.session.history.enabled", "true").toBoolean)
-    assert(client.getConf("hive.execution.engine", "tez") === "mr")
-  }
-
-  private def getNestedMessages(e: Throwable): String = {
-    var causes = ""
-    var lastException = e
-    while (lastException != null) {
-      causes += lastException.toString + "\n"
-      lastException = lastException.getCause
-    }
-    causes
-  }
-
-  private val emptyDir = Utils.createTempDir().getCanonicalPath
-
-  // Its actually pretty easy to mess things up and have all of your tests 
"pass" by accidentally
-  // connecting to an auto-populated, in-process metastore.  Let's make sure 
we are getting the
-  // versions right by forcing a known compatibility failure.
-  // TODO: currently only works on mysql where we manually create the schema...
-  ignore("failure sanity check") {
-    val e = intercept[Throwable] {
-      val badClient = quietly { buildClient("13", new Configuration()) }
-    }
-    assert(getNestedMessages(e) contains "Unknown column 'A0.OWNER_NAME' in 
'field list'")
-  }
-
-  private val versions = if 
(SystemUtils.isJavaVersionAtLeast(JavaVersion.JAVA_9)) {
-    Seq("2.0", "2.1", "2.2", "2.3", "3.0", "3.1")
-  } else {
-    Seq("0.12", "0.13", "0.14", "1.0", "1.1", "1.2", "2.0", "2.1", "2.2", 
"2.3", "3.0", "3.1")
-  }
-
-  private var client: HiveClient = null
-
-  private var versionSpark: TestHiveVersion = null
-
-  versions.foreach { version =>
-    test(s"$version: create client") {
-      client = null
-      System.gc() // Hack to avoid SEGV on some JVM versions.
-      val hadoopConf = new Configuration()
-      hadoopConf.set("test", "success")
-      // Hive changed the default of datanucleus.schema.autoCreateAll from 
true to false and
-      // hive.metastore.schema.verification from false to true since 2.0
-      // For details, see the JIRA HIVE-6113 and HIVE-12463
-      if (version == "2.0" || version == "2.1" || version == "2.2" || version 
== "2.3" ||
-          version == "3.0" || version == "3.1") {
-        hadoopConf.set("datanucleus.schema.autoCreateAll", "true")
-        hadoopConf.set("hive.metastore.schema.verification", "false")
-      }
-      if (version == "3.0" || version == "3.1") {
-        // Since Hive 3.0, HIVE-19310 skipped `ensureDbInit` if 
`hive.in.test=false`.
-        hadoopConf.set("hive.in.test", "true")
-        // Since HIVE-17626(Hive 3.0.0), need to set 
hive.query.reexecution.enabled=false.
-        hadoopConf.set("hive.query.reexecution.enabled", "false")

Review comment:
       `hadoopConf.set("hive.query.reexecution.enabled", "false")` move to 
`HiveVersionSuite.scala` as default configuration.
   
   




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] [spark] LuciferYang commented on a change in pull request #35335: [SPARK-38036][SQL][TESTS] Refactor `VersionsSuite` to `HiveClientSuite` and make it a subclass of `HiveVersionSuite`

Reply via email to