[GitHub] spark pull request #23108: [Spark-25993][SQL][TEST]Add test cases for CREATE...
Github user dongjoon-hyun commented on a diff in the pull request: https://github.com/apache/spark/pull/23108#discussion_r240022467 --- Diff: sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala --- @@ -222,4 +222,61 @@ class HiveParquetSourceSuite extends ParquetPartitioningTest { assert(df4.columns === Array("str", "max_int")) } } + + test("SPARK-25993 CREATE EXTERNAL TABLE with subdirectories") { --- End diff -- Please fix this first for the first and second review comments. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #23108: [Spark-25993][SQL][TEST]Add test cases for CREATE...
Github user dongjoon-hyun commented on a diff in the pull request: https://github.com/apache/spark/pull/23108#discussion_r240014393 --- Diff: sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala --- @@ -190,4 +192,103 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton { } } } + + test("SPARK-25993 CREATE EXTERNAL TABLE with subdirectories") { +Seq(true, false).foreach { convertMetastore => + withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> s"$convertMetastore") { +withTempDir { dir => + val dataDir = new File(s"${dir.getCanonicalPath}/l3/l2/l1/").toURI + val parentDir = s"${dir.getCanonicalPath}/l3/l2/" + val l3Dir = s"${dir.getCanonicalPath}/l3/" + val wildcardParentDir = new File(s"${dir}/l3/l2/*").toURI + val wildcardL3Dir = new File(s"${dir}/l3/*").toURI + + try { +hiveClient.runSqlHive("USE default") +hiveClient.runSqlHive( + """ +|CREATE EXTERNAL TABLE hive_orc( +| C1 INT, +| C2 INT, +| C3 STRING) +|STORED AS orc""".stripMargin) +// Hive throws an exception if I assign the location in the create table statement. +hiveClient.runSqlHive( + s"ALTER TABLE hive_orc SET LOCATION '$dataDir'") +hiveClient.runSqlHive( + """ +|INSERT INTO TABLE hive_orc +|VALUES (1, 1, 'orc1'), (2, 2, 'orc2')""".stripMargin) + +withTable("tbl1", "tbl2", "tbl3", "tbl4") { + val parentDirStatement = +s""" + |CREATE EXTERNAL TABLE tbl1( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '${parentDir}'""".stripMargin + sql(parentDirStatement) + val parentDirSqlStatement = s"select * from tbl1" + if (convertMetastore) { +checkAnswer(sql(parentDirSqlStatement), Nil) + } else { +checkAnswer(sql(parentDirSqlStatement), + (1 to 2).map(i => Row(i, i, s"orc$i"))) + } + + val l3DirStatement = +s""" + |CREATE EXTERNAL TABLE tbl2( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '${l3Dir}'""".stripMargin + sql(l3DirStatement) + val l3DirSqlStatement = s"select * from tbl2" + if (convertMetastore) { +checkAnswer(sql(l3DirSqlStatement), Nil) + } else { +checkAnswer(sql(l3DirSqlStatement), + (1 to 2).map(i => Row(i, i, s"orc$i"))) + } + + val wildcardStatement = +s""" + |CREATE EXTERNAL TABLE tbl3( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '$wildcardParentDir'""".stripMargin + sql(wildcardStatement) + val wildcardSqlStatement = s"select * from tbl3" + if (convertMetastore) { +checkAnswer(sql(wildcardSqlStatement), + (1 to 2).map(i => Row(i, i, s"orc$i"))) + } else { +checkAnswer(sql(wildcardSqlStatement), Nil) + } + + val wildcardL3Statement = +s""" + |CREATE EXTERNAL TABLE tbl4( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '$wildcardL3Dir'""".stripMargin + sql(wildcardL3Statement) + val wildcardL3SqlStatement = s"select * from tbl4" + checkAnswer(sql(wildcardL3SqlStatement), Nil) +} + } finally { +hiveClient.runSqlHive("DROP TABLE IF EXISTS hive_orc") --- End diff -- Got it. I missed that. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #23108: [Spark-25993][SQL][TEST]Add test cases for CREATE...
Github user kevinyu98 commented on a diff in the pull request: https://github.com/apache/spark/pull/23108#discussion_r239663711 --- Diff: sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala --- @@ -222,4 +223,66 @@ class HiveParquetSourceSuite extends ParquetPartitioningTest { assert(df4.columns === Array("str", "max_int")) } } + + test("SPARK-25993 CREATE EXTERNAL TABLE with subdirectories") { +withTempPath { path => + withTable("tbl1", "tbl2", "tbl3") { +val someDF1 = Seq((1, 1, "parq1"), (2, 2, "parq2")). + toDF("c1", "c2", "c3").repartition(1) +val dataDir = s"${path.getCanonicalPath}/l3/l2/l1/" +val parentDir = s"${path.getCanonicalPath}/l3/l2/" +val wildcardParentDir = new File(s"${path}/l3/l2/*").toURI +val wildcardL3Dir = new File(s"${path}/l3/*").toURI +someDF1.write.parquet(dataDir) +val parentDirStatement = + s""" + |CREATE EXTERNAL TABLE tbl1( + | c1 int, + | c2 int, + | c3 string) + |STORED AS parquet + |LOCATION '${parentDir}'""".stripMargin +sql(parentDirStatement) +val wildcardStatement = + s""" + |CREATE EXTERNAL TABLE tbl2( + | c1 int, + | c2 int, + | c3 string) + |STORED AS parquet + |LOCATION '${wildcardParentDir}'""".stripMargin +sql(wildcardStatement) +val wildcardL3Statement = + s""" + |CREATE EXTERNAL TABLE tbl3( + | c1 int, + | c2 int, + | c3 string) + |STORED AS parquet + |LOCATION '${wildcardL3Dir}'""".stripMargin +sql(wildcardL3Statement) + +Seq("true", "false").foreach { parquetConversion => + withSQLConf(HiveUtils.CONVERT_METASTORE_PARQUET.key -> parquetConversion) { +if (parquetConversion == "true") { + checkAnswer(sql("select * from tbl1"), Nil) + checkAnswer(sql("select * from tbl2"), +(1 to 2).map(i => Row(i, i, s"parq$i"))) + checkAnswer(sql("select * from tbl3"), Nil) +} else { + Seq("select * from tbl1", "select * from tbl2", "select * from tbl3").foreach { +sqlStmt => + try { +sql(sqlStmt) + } catch { +case e: IOException => + assert(e.getMessage().contains("java.io.IOException: Not a file")) + } --- End diff -- you are right, I will make changes. Thanks. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #23108: [Spark-25993][SQL][TEST]Add test cases for CREATE...
Github user kevinyu98 commented on a diff in the pull request: https://github.com/apache/spark/pull/23108#discussion_r239635890 --- Diff: sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala --- @@ -190,4 +192,103 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton { } } } + + test("SPARK-25993 CREATE EXTERNAL TABLE with subdirectories") { +Seq(true, false).foreach { convertMetastore => + withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> s"$convertMetastore") { +withTempDir { dir => + val dataDir = new File(s"${dir.getCanonicalPath}/l3/l2/l1/").toURI + val parentDir = s"${dir.getCanonicalPath}/l3/l2/" + val l3Dir = s"${dir.getCanonicalPath}/l3/" + val wildcardParentDir = new File(s"${dir}/l3/l2/*").toURI + val wildcardL3Dir = new File(s"${dir}/l3/*").toURI + + try { +hiveClient.runSqlHive("USE default") +hiveClient.runSqlHive( + """ +|CREATE EXTERNAL TABLE hive_orc( +| C1 INT, +| C2 INT, +| C3 STRING) +|STORED AS orc""".stripMargin) +// Hive throws an exception if I assign the location in the create table statement. +hiveClient.runSqlHive( + s"ALTER TABLE hive_orc SET LOCATION '$dataDir'") +hiveClient.runSqlHive( + """ +|INSERT INTO TABLE hive_orc +|VALUES (1, 1, 'orc1'), (2, 2, 'orc2')""".stripMargin) + +withTable("tbl1", "tbl2", "tbl3", "tbl4") { + val parentDirStatement = +s""" + |CREATE EXTERNAL TABLE tbl1( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '${parentDir}'""".stripMargin + sql(parentDirStatement) + val parentDirSqlStatement = s"select * from tbl1" + if (convertMetastore) { +checkAnswer(sql(parentDirSqlStatement), Nil) + } else { +checkAnswer(sql(parentDirSqlStatement), + (1 to 2).map(i => Row(i, i, s"orc$i"))) + } + + val l3DirStatement = +s""" + |CREATE EXTERNAL TABLE tbl2( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '${l3Dir}'""".stripMargin + sql(l3DirStatement) + val l3DirSqlStatement = s"select * from tbl2" + if (convertMetastore) { +checkAnswer(sql(l3DirSqlStatement), Nil) + } else { +checkAnswer(sql(l3DirSqlStatement), + (1 to 2).map(i => Row(i, i, s"orc$i"))) + } + + val wildcardStatement = +s""" + |CREATE EXTERNAL TABLE tbl3( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '$wildcardParentDir'""".stripMargin + sql(wildcardStatement) + val wildcardSqlStatement = s"select * from tbl3" + if (convertMetastore) { +checkAnswer(sql(wildcardSqlStatement), + (1 to 2).map(i => Row(i, i, s"orc$i"))) + } else { +checkAnswer(sql(wildcardSqlStatement), Nil) + } + + val wildcardL3Statement = +s""" + |CREATE EXTERNAL TABLE tbl4( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '$wildcardL3Dir'""".stripMargin + sql(wildcardL3Statement) + val wildcardL3SqlStatement = s"select * from tbl4" + checkAnswer(sql(wildcardL3SqlStatement), Nil) +} + } finally { +hiveClient.runSqlHive("DROP TABLE IF EXISTS hive_orc") --- End diff -- @dongjoon-hyun at line 221, I put the `tbl1` ~ `tbl4` with the `withTable`, I think it will get dropped. I tried to run it couple time in intellij, it seems work fine. what do you think? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail:
[GitHub] spark pull request #23108: [Spark-25993][SQL][TEST]Add test cases for CREATE...
Github user kevinyu98 commented on a diff in the pull request: https://github.com/apache/spark/pull/23108#discussion_r239580991 --- Diff: sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala --- @@ -32,6 +32,7 @@ import org.apache.spark.util.Utils class HiveParquetSourceSuite extends ParquetPartitioningTest { import testImplicits._ import spark._ + import java.io.IOException --- End diff -- ok --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #23108: [Spark-25993][SQL][TEST]Add test cases for CREATE...
Github user kevinyu98 commented on a diff in the pull request: https://github.com/apache/spark/pull/23108#discussion_r239577623 --- Diff: sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala --- @@ -73,9 +73,11 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton { sql( s""" |CREATE TABLE $tableName + |USING org.apache.spark.sql.hive.orc |OPTIONS ( - | PATH '${new File(orcTableAsDir.getAbsolutePath).toURI}' + | PATH '${new File(orcTableAsDir.getAbsolutePath +).toURI}' --- End diff -- good catch, I didn't notice my changes affect the formatting in the file. I have revert the change. Thanks --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #23108: [Spark-25993][SQL][TEST]Add test cases for CREATE...
Github user dongjoon-hyun commented on a diff in the pull request: https://github.com/apache/spark/pull/23108#discussion_r239365705 --- Diff: sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala --- @@ -32,6 +32,7 @@ import org.apache.spark.util.Utils class HiveParquetSourceSuite extends ParquetPartitioningTest { import testImplicits._ import spark._ + import java.io.IOException --- End diff -- This had better go to line 21. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #23108: [Spark-25993][SQL][TEST]Add test cases for CREATE...
Github user dongjoon-hyun commented on a diff in the pull request: https://github.com/apache/spark/pull/23108#discussion_r239365367 --- Diff: sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala --- @@ -190,4 +192,103 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton { } } } + + test("SPARK-25993 CREATE EXTERNAL TABLE with subdirectories") { +Seq(true, false).foreach { convertMetastore => + withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> s"$convertMetastore") { +withTempDir { dir => + val dataDir = new File(s"${dir.getCanonicalPath}/l3/l2/l1/").toURI + val parentDir = s"${dir.getCanonicalPath}/l3/l2/" + val l3Dir = s"${dir.getCanonicalPath}/l3/" + val wildcardParentDir = new File(s"${dir}/l3/l2/*").toURI + val wildcardL3Dir = new File(s"${dir}/l3/*").toURI + + try { +hiveClient.runSqlHive("USE default") +hiveClient.runSqlHive( + """ +|CREATE EXTERNAL TABLE hive_orc( +| C1 INT, +| C2 INT, +| C3 STRING) +|STORED AS orc""".stripMargin) +// Hive throws an exception if I assign the location in the create table statement. +hiveClient.runSqlHive( + s"ALTER TABLE hive_orc SET LOCATION '$dataDir'") +hiveClient.runSqlHive( + """ +|INSERT INTO TABLE hive_orc +|VALUES (1, 1, 'orc1'), (2, 2, 'orc2')""".stripMargin) + +withTable("tbl1", "tbl2", "tbl3", "tbl4") { + val parentDirStatement = +s""" + |CREATE EXTERNAL TABLE tbl1( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '${parentDir}'""".stripMargin + sql(parentDirStatement) + val parentDirSqlStatement = s"select * from tbl1" + if (convertMetastore) { +checkAnswer(sql(parentDirSqlStatement), Nil) + } else { +checkAnswer(sql(parentDirSqlStatement), + (1 to 2).map(i => Row(i, i, s"orc$i"))) + } + + val l3DirStatement = +s""" + |CREATE EXTERNAL TABLE tbl2( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '${l3Dir}'""".stripMargin + sql(l3DirStatement) + val l3DirSqlStatement = s"select * from tbl2" + if (convertMetastore) { +checkAnswer(sql(l3DirSqlStatement), Nil) + } else { +checkAnswer(sql(l3DirSqlStatement), + (1 to 2).map(i => Row(i, i, s"orc$i"))) + } + + val wildcardStatement = +s""" + |CREATE EXTERNAL TABLE tbl3( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '$wildcardParentDir'""".stripMargin + sql(wildcardStatement) + val wildcardSqlStatement = s"select * from tbl3" + if (convertMetastore) { +checkAnswer(sql(wildcardSqlStatement), + (1 to 2).map(i => Row(i, i, s"orc$i"))) + } else { +checkAnswer(sql(wildcardSqlStatement), Nil) + } + + val wildcardL3Statement = +s""" + |CREATE EXTERNAL TABLE tbl4( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '$wildcardL3Dir'""".stripMargin + sql(wildcardL3Statement) + val wildcardL3SqlStatement = s"select * from tbl4" + checkAnswer(sql(wildcardL3SqlStatement), Nil) +} + } finally { +hiveClient.runSqlHive("DROP TABLE IF EXISTS hive_orc") --- End diff -- It seems that we need to clean up `tbl1` ~ `tbl4`, too. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #23108: [Spark-25993][SQL][TEST]Add test cases for CREATE...
Github user dongjoon-hyun commented on a diff in the pull request: https://github.com/apache/spark/pull/23108#discussion_r239364631 --- Diff: sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala --- @@ -222,4 +223,66 @@ class HiveParquetSourceSuite extends ParquetPartitioningTest { assert(df4.columns === Array("str", "max_int")) } } + + test("SPARK-25993 CREATE EXTERNAL TABLE with subdirectories") { +withTempPath { path => + withTable("tbl1", "tbl2", "tbl3") { +val someDF1 = Seq((1, 1, "parq1"), (2, 2, "parq2")). + toDF("c1", "c2", "c3").repartition(1) +val dataDir = s"${path.getCanonicalPath}/l3/l2/l1/" +val parentDir = s"${path.getCanonicalPath}/l3/l2/" +val wildcardParentDir = new File(s"${path}/l3/l2/*").toURI +val wildcardL3Dir = new File(s"${path}/l3/*").toURI +someDF1.write.parquet(dataDir) +val parentDirStatement = + s""" + |CREATE EXTERNAL TABLE tbl1( + | c1 int, + | c2 int, + | c3 string) + |STORED AS parquet + |LOCATION '${parentDir}'""".stripMargin +sql(parentDirStatement) +val wildcardStatement = + s""" + |CREATE EXTERNAL TABLE tbl2( + | c1 int, + | c2 int, + | c3 string) + |STORED AS parquet + |LOCATION '${wildcardParentDir}'""".stripMargin +sql(wildcardStatement) +val wildcardL3Statement = + s""" + |CREATE EXTERNAL TABLE tbl3( + | c1 int, + | c2 int, + | c3 string) + |STORED AS parquet + |LOCATION '${wildcardL3Dir}'""".stripMargin +sql(wildcardL3Statement) + +Seq("true", "false").foreach { parquetConversion => + withSQLConf(HiveUtils.CONVERT_METASTORE_PARQUET.key -> parquetConversion) { +if (parquetConversion == "true") { + checkAnswer(sql("select * from tbl1"), Nil) + checkAnswer(sql("select * from tbl2"), +(1 to 2).map(i => Row(i, i, s"parq$i"))) + checkAnswer(sql("select * from tbl3"), Nil) +} else { + Seq("select * from tbl1", "select * from tbl2", "select * from tbl3").foreach { +sqlStmt => + try { +sql(sqlStmt) + } catch { +case e: IOException => + assert(e.getMessage().contains("java.io.IOException: Not a file")) + } --- End diff -- @kevinyu98 . Is this testing exceptions for the above all three SQLs? We use `intercept[IOException]` to test expected `Exception`s. However, this is not a robust test case, because there is no `assert(false)` after `sql(sqlStmt)`. We need to check the individual query failure and success exactly for the specific configuration. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #23108: [Spark-25993][SQL][TEST]Add test cases for CREATE...
Github user dongjoon-hyun commented on a diff in the pull request: https://github.com/apache/spark/pull/23108#discussion_r239361792 --- Diff: sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala --- @@ -73,9 +73,11 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton { sql( s""" |CREATE TABLE $tableName + |USING org.apache.spark.sql.hive.orc |OPTIONS ( - | PATH '${new File(orcTableAsDir.getAbsolutePath).toURI}' + | PATH '${new File(orcTableAsDir.getAbsolutePath +).toURI}' --- End diff -- The above change in line 76 ~ 80 looks strange and irrelevant. Let's revert this change. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #23108: [Spark-25993][SQL][TEST]Add test cases for CREATE...
Github user kevinyu98 commented on a diff in the pull request: https://github.com/apache/spark/pull/23108#discussion_r239266908 --- Diff: sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala --- @@ -186,6 +186,82 @@ abstract class OrcSuite extends OrcTest with BeforeAndAfterAll { } } + protected def testORCTableLocation(isConvertMetastore: Boolean): Unit = { --- End diff -- ok, I moved. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #23108: [Spark-25993][SQL][TEST]Add test cases for CREATE...
Github user kevinyu98 commented on a diff in the pull request: https://github.com/apache/spark/pull/23108#discussion_r239266720 --- Diff: sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala --- @@ -2370,4 +2370,51 @@ class HiveDDLSuite )) } } + + test("SPARK-25993 Add test cases for resolution of Parquet table location") { --- End diff -- sure, changed. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #23108: [Spark-25993][SQL][TEST]Add test cases for CREATE...
Github user kevinyu98 commented on a diff in the pull request: https://github.com/apache/spark/pull/23108#discussion_r239266747 --- Diff: sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala --- @@ -190,4 +190,12 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton { } } } + + test("SPARK-25993 Add test cases for resolution of ORC table location") { --- End diff -- Changed. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #23108: [Spark-25993][SQL][TEST]Add test cases for CREATE...
Github user kevinyu98 commented on a diff in the pull request: https://github.com/apache/spark/pull/23108#discussion_r239266791 --- Diff: sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala --- @@ -2370,4 +2370,51 @@ class HiveDDLSuite )) } } + + test("SPARK-25993 Add test cases for resolution of Parquet table location") { +withTempPath { path => +val someDF1 = Seq((1, 1, "parq1"), (2, 2, "parq2")).toDF("c1", "c2", "c3").repartition(1) +withTable("tbl1", "tbl2", "tbl3") { +val dataDir = s"${path.getCanonicalPath}/l3/l2/l1/" --- End diff -- fixed --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #23108: [Spark-25993][SQL][TEST]Add test cases for CREATE...
Github user kevinyu98 commented on a diff in the pull request: https://github.com/apache/spark/pull/23108#discussion_r239266816 --- Diff: sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala --- @@ -2370,4 +2370,51 @@ class HiveDDLSuite )) } } + + test("SPARK-25993 Add test cases for resolution of Parquet table location") { +withTempPath { path => +val someDF1 = Seq((1, 1, "parq1"), (2, 2, "parq2")).toDF("c1", "c2", "c3").repartition(1) +withTable("tbl1", "tbl2", "tbl3") { +val dataDir = s"${path.getCanonicalPath}/l3/l2/l1/" +val parentDir = s"${path.getCanonicalPath}/l3/l2/" +val l3Dir = s"${path.getCanonicalPath}/l3/" +val wildcardParentDir = new File(s"${path}/l3/l2/*").toURI +val wildcardL3Dir = new File(s"${path}/l3/*").toURI +someDF1.write.parquet(dataDir) +val parentDirStatement = + s""" + |CREATE EXTERNAL TABLE tbl1( + | c1 int, + | c2 int, + | c3 string) + |STORED AS parquet + |LOCATION '${parentDir}'""".stripMargin +sql(parentDirStatement) +checkAnswer(sql("select * from tbl1"), Nil) + +val wildcardStatement = + s""" + |CREATE EXTERNAL TABLE tbl2( + | c1 int, + | c2 int, + | c3 string) + |STORED AS parquet + |LOCATION '${wildcardParentDir}'""".stripMargin +sql(wildcardStatement) +checkAnswer(sql("select * from tbl2"), + (1 to 2).map(i => Row(i, i, s"parq$i"))) + +val wildcardL3Statement = +s""" --- End diff -- fixed --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #23108: [Spark-25993][SQL][TEST]Add test cases for CREATE...
Github user kevinyu98 commented on a diff in the pull request: https://github.com/apache/spark/pull/23108#discussion_r239266775 --- Diff: sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala --- @@ -2370,4 +2370,51 @@ class HiveDDLSuite )) } } + + test("SPARK-25993 Add test cases for resolution of Parquet table location") { +withTempPath { path => +val someDF1 = Seq((1, 1, "parq1"), (2, 2, "parq2")).toDF("c1", "c2", "c3").repartition(1) --- End diff -- fixed. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #23108: [Spark-25993][SQL][TEST]Add test cases for CREATE...
Github user kevinyu98 commented on a diff in the pull request: https://github.com/apache/spark/pull/23108#discussion_r239266673 --- Diff: sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala --- @@ -186,6 +186,54 @@ abstract class OrcSuite extends OrcTest with BeforeAndAfterAll { } } + protected def testORCTableLocation(isConvertMetastore: Boolean): Unit = { +val tableName1 = "spark_orc1" +val tableName2 = "spark_orc2" + +withTempDir { dir => + val someDF1 = Seq((1, 1, "orc1"), (2, 2, "orc2")).toDF("c1", "c2", "c3").repartition(1) + withTable(tableName1, tableName2) { +val dataDir = s"${dir.getCanonicalPath}/dir1/" +val parentDir = s"${dir.getCanonicalPath}/" +val wildCardDir = new File(s"${dir}/*").toURI +someDF1.write.orc(dataDir) +val parentDirStatement = + s""" + |CREATE EXTERNAL TABLE $tableName1( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '${parentDir}'""".stripMargin +sql(parentDirStatement) +val parentDirSqlStatement = s"select * from ${tableName1}" +if (isConvertMetastore) { + checkAnswer(sql(parentDirSqlStatement), Nil) +} else { + checkAnswer(sql(parentDirSqlStatement), + (1 to 2).map(i => Row(i, i, s"orc$i"))) +} + +val wildCardStatement = + s""" + |CREATE EXTERNAL TABLE $tableName2( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '$wildCardDir'""".stripMargin --- End diff -- done --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #23108: [Spark-25993][SQL][TEST]Add test cases for CREATE...
Github user dongjoon-hyun commented on a diff in the pull request: https://github.com/apache/spark/pull/23108#discussion_r238944485 --- Diff: sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala --- @@ -186,6 +186,82 @@ abstract class OrcSuite extends OrcTest with BeforeAndAfterAll { } } + protected def testORCTableLocation(isConvertMetastore: Boolean): Unit = { --- End diff -- Since this test helper function is only used in `HiveOrcSourceSuite`, can we move this into `HiveOrcSourceSuite`? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #23108: [Spark-25993][SQL][TEST]Add test cases for CREATE...
Github user dongjoon-hyun commented on a diff in the pull request: https://github.com/apache/spark/pull/23108#discussion_r238944132 --- Diff: sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala --- @@ -2370,4 +2370,51 @@ class HiveDDLSuite )) } } + + test("SPARK-25993 Add test cases for resolution of Parquet table location") { +withTempPath { path => +val someDF1 = Seq((1, 1, "parq1"), (2, 2, "parq2")).toDF("c1", "c2", "c3").repartition(1) +withTable("tbl1", "tbl2", "tbl3") { +val dataDir = s"${path.getCanonicalPath}/l3/l2/l1/" +val parentDir = s"${path.getCanonicalPath}/l3/l2/" +val l3Dir = s"${path.getCanonicalPath}/l3/" +val wildcardParentDir = new File(s"${path}/l3/l2/*").toURI +val wildcardL3Dir = new File(s"${path}/l3/*").toURI +someDF1.write.parquet(dataDir) +val parentDirStatement = + s""" + |CREATE EXTERNAL TABLE tbl1( + | c1 int, + | c2 int, + | c3 string) + |STORED AS parquet + |LOCATION '${parentDir}'""".stripMargin +sql(parentDirStatement) +checkAnswer(sql("select * from tbl1"), Nil) + +val wildcardStatement = + s""" + |CREATE EXTERNAL TABLE tbl2( + | c1 int, + | c2 int, + | c3 string) + |STORED AS parquet + |LOCATION '${wildcardParentDir}'""".stripMargin +sql(wildcardStatement) +checkAnswer(sql("select * from tbl2"), + (1 to 2).map(i => Row(i, i, s"parq$i"))) + +val wildcardL3Statement = +s""" --- End diff -- indentation? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #23108: [Spark-25993][SQL][TEST]Add test cases for CREATE...
Github user dongjoon-hyun commented on a diff in the pull request: https://github.com/apache/spark/pull/23108#discussion_r238944067 --- Diff: sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala --- @@ -2370,4 +2370,51 @@ class HiveDDLSuite )) } } + + test("SPARK-25993 Add test cases for resolution of Parquet table location") { +withTempPath { path => +val someDF1 = Seq((1, 1, "parq1"), (2, 2, "parq2")).toDF("c1", "c2", "c3").repartition(1) --- End diff -- Indentation. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #23108: [Spark-25993][SQL][TEST]Add test cases for CREATE...
Github user dongjoon-hyun commented on a diff in the pull request: https://github.com/apache/spark/pull/23108#discussion_r238944097 --- Diff: sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala --- @@ -2370,4 +2370,51 @@ class HiveDDLSuite )) } } + + test("SPARK-25993 Add test cases for resolution of Parquet table location") { +withTempPath { path => +val someDF1 = Seq((1, 1, "parq1"), (2, 2, "parq2")).toDF("c1", "c2", "c3").repartition(1) +withTable("tbl1", "tbl2", "tbl3") { +val dataDir = s"${path.getCanonicalPath}/l3/l2/l1/" --- End diff -- indentation? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #23108: [Spark-25993][SQL][TEST]Add test cases for CREATE...
Github user dongjoon-hyun commented on a diff in the pull request: https://github.com/apache/spark/pull/23108#discussion_r238943983 --- Diff: sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala --- @@ -2370,4 +2370,51 @@ class HiveDDLSuite )) } } + + test("SPARK-25993 Add test cases for resolution of Parquet table location") { --- End diff -- Also, for the full test coverage, can we have the following combination like ORC, too? ``` Seq(true, false).foreach { convertMetastore => ``` --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #23108: [Spark-25993][SQL][TEST]Add test cases for CREATE...
Github user dongjoon-hyun commented on a diff in the pull request: https://github.com/apache/spark/pull/23108#discussion_r238943694 --- Diff: sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala --- @@ -190,4 +190,12 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton { } } } + + test("SPARK-25993 Add test cases for resolution of ORC table location") { --- End diff -- Please change this to `CREATE EXTERNAL TABLE with subdirectories`, too. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #23108: [Spark-25993][SQL][TEST]Add test cases for CREATE...
Github user dongjoon-hyun commented on a diff in the pull request: https://github.com/apache/spark/pull/23108#discussion_r238943607 --- Diff: sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala --- @@ -2370,4 +2370,51 @@ class HiveDDLSuite )) } } + + test("SPARK-25993 Add test cases for resolution of Parquet table location") { --- End diff -- Also, let's replace the test case name with `CREATE EXTERNAL TABLE with subdirectories`. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #23108: [Spark-25993][SQL][TEST]Add test cases for CREATE...
Github user dongjoon-hyun commented on a diff in the pull request: https://github.com/apache/spark/pull/23108#discussion_r238943270 --- Diff: sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala --- @@ -2370,4 +2370,51 @@ class HiveDDLSuite )) } } + + test("SPARK-25993 Add test cases for resolution of Parquet table location") { --- End diff -- Maybe, `HiveParquetSourceSuite`? That's the similar one with `OrcSourceSuite`. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org