Github user dongjoon-hyun commented on a diff in the pull request:
https://github.com/apache/spark/pull/23108#discussion_r237337683
--- Diff:
sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala
---
@@ -186,6 +186,54 @@ abstract class OrcSuite extends OrcTest with
BeforeAndAfterAll {
}
}
+ protected def testORCTableLocation(isConvertMetastore: Boolean): Unit = {
+ val tableName1 = "spark_orc1"
+ val tableName2 = "spark_orc2"
+
+ withTempDir { dir =>
+ val someDF1 = Seq((1, 1, "orc1"), (2, 2, "orc2")).toDF("c1", "c2",
"c3").repartition(1)
+ withTable(tableName1, tableName2) {
+ val dataDir = s"${dir.getCanonicalPath}/dir1/"
+ val parentDir = s"${dir.getCanonicalPath}/"
+ val wildCardDir = new File(s"${dir}/*").toURI
+ someDF1.write.orc(dataDir)
+ val parentDirStatement =
+ s"""
+ |CREATE EXTERNAL TABLE $tableName1(
+ | c1 int,
+ | c2 int,
+ | c3 string)
+ |STORED AS orc
+ |LOCATION '${parentDir}'""".stripMargin
+ sql(parentDirStatement)
+ val parentDirSqlStatement = s"select * from ${tableName1}"
+ if (isConvertMetastore) {
+ checkAnswer(sql(parentDirSqlStatement), Nil)
+ } else {
+ checkAnswer(sql(parentDirSqlStatement),
+ (1 to 2).map(i => Row(i, i, s"orc$i")))
+ }
+
+ val wildCardStatement =
+ s"""
+ |CREATE EXTERNAL TABLE $tableName2(
+ | c1 int,
+ | c2 int,
+ | c3 string)
+ |STORED AS orc
+ |LOCATION '$wildCardDir'""".stripMargin
--- End diff --
I have two suggestions.
1. Is this PR aiming only one-level subdirectories? Could you check the
behavior on one, two, three level subdirectories in Parquet Hive tables first?
2. Since the test case looks general for both Parquet/ORC, please add a
test case for Parquet while you are here.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]