[GitHub] spark pull request #19148: [SPARK-21936][SQL][WIP] backward compatibility te...

dongjoon-hyun Wed, 06 Sep 2017 16:05:43 -0700

Github user dongjoon-hyun commented on a diff in the pull request:

    https://github.com/apache/spark/pull/19148#discussion_r137412267
  
    --- Diff: 
sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
 ---
    @@ -0,0 +1,199 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *    http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.spark.sql.hive
    +
    +import java.io.File
    +import java.sql.Timestamp
    +import java.util.Date
    +
    +import scala.collection.mutable.ArrayBuffer
    +
    +import org.scalatest.concurrent.Timeouts
    +import org.scalatest.exceptions.TestFailedDueToTimeoutException
    +import org.scalatest.time.SpanSugar._
    +
    +import org.apache.spark.{SparkFunSuite, TestUtils}
    +import org.apache.spark.sql.{QueryTest, Row, SparkSession}
    +import org.apache.spark.sql.test.ProcessTestUtils.ProcessOutputCapturer
    +import org.apache.spark.util.Utils
    +
    +/**
    + * Test HiveExternalCatalog backward compatibility.
    + *
    + * Note that, this test suite will automatically download spark binary 
packages of different
    + * versions to a local directory `/tmp/spark-test`. If there is already a 
spark folder with
    + * expected version under this local directory, e.g. 
`/tmp/spark-test/spark-2.0.3`, we will skip the
    + * downloading for this spark version.
    + */
    +class HiveExternalCatalogVersionsSuite extends SparkFunSuite with Timeouts 
{
    +  private val wareHousePath = Utils.createTempDir(namePrefix = "warehouse")
    +  private val sparkTestingDir = "/tmp/spark-test"
    +  private val unusedJar = TestUtils.createJarWithClasses(Seq.empty)
    +
    +  override def afterAll(): Unit = {
    +    Utils.deleteRecursively(wareHousePath)
    +    super.afterAll()
    +  }
    +
    +  // NOTE: This is an expensive operation in terms of time (10 seconds+). 
Use sparingly.
    +  // This is copied from org.apache.spark.deploy.SparkSubmitSuite
    +  private def runSparkSubmit(args: Seq[String], sparkHomeOpt: 
Option[String] = None): Unit = {
    +    val sparkHome = sparkHomeOpt.getOrElse(
    +      sys.props.getOrElse("spark.test.home", fail("spark.test.home is not 
set!")))
    +    val history = ArrayBuffer.empty[String]
    +    val sparkSubmit = if (Utils.isWindows) {
    +      // On Windows, `ProcessBuilder.directory` does not change the 
current working directory.
    +      new File("..\\..\\bin\\spark-submit.cmd").getAbsolutePath
    +    } else {
    +      "./bin/spark-submit"
    +    }
    +    val commands = Seq(sparkSubmit) ++ args
    +    val commandLine = commands.mkString("'", "' '", "'")
    +
    +    val builder = new ProcessBuilder(commands: _*).directory(new 
File(sparkHome))
    +    val env = builder.environment()
    +    env.put("SPARK_TESTING", "1")
    +    env.put("SPARK_HOME", sparkHome)
    +
    +    def captureOutput(source: String)(line: String): Unit = {
    +      // This test suite has some weird behaviors when executed on Jenkins:
    +      //
    +      // 1. Sometimes it gets extremely slow out of unknown reason on 
Jenkins.  Here we add a
    +      //    timestamp to provide more diagnosis information.
    +      // 2. Log lines are not correctly redirected to unit-tests.log as 
expected, so here we print
    +      //    them out for debugging purposes.
    +      val logLine = s"${new Timestamp(new Date().getTime)} - $source> 
$line"
    +      // scalastyle:off println
    +      println(logLine)
    +      // scalastyle:on println
    +      history += logLine
    +    }
    +
    +    val process = builder.start()
    +    new ProcessOutputCapturer(process.getInputStream, 
captureOutput("stdout")).start()
    +    new ProcessOutputCapturer(process.getErrorStream, 
captureOutput("stderr")).start()
    +
    +    try {
    +      val exitCode = failAfter(300.seconds) { process.waitFor() }
    +      if (exitCode != 0) {
    +        // include logs in output. Note that logging is async and may not 
have completed
    +        // at the time this exception is raised
    +        Thread.sleep(1000)
    +        val historyLog = history.mkString("\n")
    +        fail {
    +          s"""spark-submit returned with exit code $exitCode.
    +             |Command line: $commandLine
    +             |
    +             |$historyLog
    +           """.stripMargin
    +        }
    +      }
    +    } catch {
    +      case to: TestFailedDueToTimeoutException =>
    +        val historyLog = history.mkString("\n")
    +        fail(s"Timeout of $commandLine" +
    +          s" See the log4j logs for more detail." +
    +          s"\n$historyLog", to)
    +      case t: Throwable => throw t
    +    } finally {
    +      // Ensure we still kill the process in case it timed out
    +      process.destroy()
    +    }
    +  }
    +
    +  private def downloadSpark(version: String): Unit = {
    +    import scala.sys.process._
    +
    +    val url =
    +      
s"http://mirrors.hust.edu.cn/apache/spark/spark-$version/spark-$version-bin-hadoop2.7.tgz";
    --- End diff --
    
    It's great to have this. Is it okay to use this single site?



---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #19148: [SPARK-21936][SQL][WIP] backward compatibility te...

Reply via email to