[GitHub] [spark] kevinyu98 commented on a change in pull request #27130: [SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with subdirectories
kevinyu98 commented on a change in pull request #27130: [SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with subdirectories URL: https://github.com/apache/spark/pull/27130#discussion_r368087150 ## File path: sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala ## @@ -170,4 +170,166 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton { test("SPARK-11412 read and merge orc schemas in parallel") { testMergeSchemasInParallel(OrcFileOperator.readOrcSchemasInParallel) } + + test("SPARK-25993 CREATE EXTERNAL TABLE with subdirectories") { +Seq(true, false).foreach { convertMetastore => + withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> s"$convertMetastore") { +withTempDir { dir => + try { +val orcTblStatement1 = + s""" + |CREATE EXTERNAL TABLE orc_tbl1( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '${s"${dir.getCanonicalPath}/l1/"}'""".stripMargin +sql(orcTblStatement1) + +val orcTblInsertL1 = + s"INSERT INTO TABLE orc_tbl1 VALUES (1, 1, 'orc1'), (2, 2, 'orc2')".stripMargin +sql(orcTblInsertL1) + +val orcTblStatement2 = +s""" + |CREATE EXTERNAL TABLE orc_tbl2( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '${s"${dir.getCanonicalPath}/l1/l2/"}'""".stripMargin +sql(orcTblStatement2) + +val orcTblInsertL2 = + s"INSERT INTO TABLE orc_tbl2 VALUES (3, 3, 'orc3'), (4, 4, 'orc4')".stripMargin +sql(orcTblInsertL2) + +val orcTblStatement3 = +s""" + |CREATE EXTERNAL TABLE orc_tbl3( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '${s"${dir.getCanonicalPath}/l1/l2/l3/"}'""".stripMargin +sql(orcTblStatement3) + +val orcTblInsertL3 = + s"INSERT INTO TABLE orc_tbl3 VALUES (5, 5, 'orc5'), (6, 6, 'orc6')".stripMargin +sql(orcTblInsertL3) + +withTable("tbl1", "tbl2", "tbl3", "tbl4", "tbl5", "tbl6") { + val topDirStatement = +s""" + |CREATE EXTERNAL TABLE tbl1( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '${s"${dir.getCanonicalPath}"}'""".stripMargin + sql(topDirStatement) + val topDirSqlStatement = s"SELECT * FROM tbl1" + if (convertMetastore) { +checkAnswer(sql(topDirSqlStatement), Nil) + } else { +checkAnswer(sql(topDirSqlStatement), + (1 to 6).map(i => Row(i, i, s"orc$i"))) + } + + val l1DirStatement = +s""" + |CREATE EXTERNAL TABLE tbl2( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '${s"${dir.getCanonicalPath}/l1/"}'""".stripMargin + sql(l1DirStatement) + val l1DirSqlStatement = s"SELECT * FROM tbl2" + if (convertMetastore) { +checkAnswer(sql(l1DirSqlStatement), + (1 to 2).map(i => Row(i, i, s"orc$i"))) + } else { +checkAnswer(sql(l1DirSqlStatement), + (1 to 6).map(i => Row(i, i, s"orc$i"))) + } + + val l2DirStatement = +s""" + |CREATE EXTERNAL TABLE tbl3( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '${s"${dir.getCanonicalPath}/l1/l2/"}'""".stripMargin + sql(l2DirStatement) + val l2DirSqlStatement = s"SELECT * FROM tbl3" + if (convertMetastore) { +checkAnswer(sql(l2DirSqlStatement), + (3 to 4).map(i => Row(i, i, s"orc$i"))) + } else { +checkAnswer(sql(l2DirSqlStatement), + (3 to 6).map(i => Row(i, i, s"orc$i"))) + } + + val wildcardTopDirStatement = +s""" + |CREATE EXTERNAL TABLE tbl4( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '${new File(s"${dir}/*").toURI}'""".stripMargin + sql(wildcardTopDirStatement) + val wildcardTopDirSqlStatement = s"SELECT * FROM tbl4" + if (convertMetastore)
[GitHub] [spark] kevinyu98 commented on a change in pull request #27130: [SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with subdirectories
kevinyu98 commented on a change in pull request #27130: [SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with subdirectories URL: https://github.com/apache/spark/pull/27130#discussion_r368083946 ## File path: sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala ## @@ -170,4 +170,166 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton { test("SPARK-11412 read and merge orc schemas in parallel") { testMergeSchemasInParallel(OrcFileOperator.readOrcSchemasInParallel) } + + test("SPARK-25993 CREATE EXTERNAL TABLE with subdirectories") { +Seq(true, false).foreach { convertMetastore => + withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> s"$convertMetastore") { +withTempDir { dir => + try { +val orcTblStatement1 = + s""" + |CREATE EXTERNAL TABLE orc_tbl1( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '${s"${dir.getCanonicalPath}/l1/"}'""".stripMargin +sql(orcTblStatement1) + +val orcTblInsertL1 = + s"INSERT INTO TABLE orc_tbl1 VALUES (1, 1, 'orc1'), (2, 2, 'orc2')".stripMargin +sql(orcTblInsertL1) + +val orcTblStatement2 = +s""" + |CREATE EXTERNAL TABLE orc_tbl2( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '${s"${dir.getCanonicalPath}/l1/l2/"}'""".stripMargin +sql(orcTblStatement2) + +val orcTblInsertL2 = + s"INSERT INTO TABLE orc_tbl2 VALUES (3, 3, 'orc3'), (4, 4, 'orc4')".stripMargin +sql(orcTblInsertL2) + +val orcTblStatement3 = +s""" + |CREATE EXTERNAL TABLE orc_tbl3( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '${s"${dir.getCanonicalPath}/l1/l2/l3/"}'""".stripMargin +sql(orcTblStatement3) + +val orcTblInsertL3 = + s"INSERT INTO TABLE orc_tbl3 VALUES (5, 5, 'orc5'), (6, 6, 'orc6')".stripMargin +sql(orcTblInsertL3) + +withTable("tbl1", "tbl2", "tbl3", "tbl4", "tbl5", "tbl6") { + val topDirStatement = +s""" + |CREATE EXTERNAL TABLE tbl1( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '${s"${dir.getCanonicalPath}"}'""".stripMargin + sql(topDirStatement) + val topDirSqlStatement = s"SELECT * FROM tbl1" + if (convertMetastore) { +checkAnswer(sql(topDirSqlStatement), Nil) + } else { +checkAnswer(sql(topDirSqlStatement), + (1 to 6).map(i => Row(i, i, s"orc$i"))) + } + + val l1DirStatement = +s""" + |CREATE EXTERNAL TABLE tbl2( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '${s"${dir.getCanonicalPath}/l1/"}'""".stripMargin + sql(l1DirStatement) + val l1DirSqlStatement = s"SELECT * FROM tbl2" + if (convertMetastore) { +checkAnswer(sql(l1DirSqlStatement), + (1 to 2).map(i => Row(i, i, s"orc$i"))) + } else { +checkAnswer(sql(l1DirSqlStatement), + (1 to 6).map(i => Row(i, i, s"orc$i"))) + } + + val l2DirStatement = +s""" + |CREATE EXTERNAL TABLE tbl3( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '${s"${dir.getCanonicalPath}/l1/l2/"}'""".stripMargin + sql(l2DirStatement) + val l2DirSqlStatement = s"SELECT * FROM tbl3" + if (convertMetastore) { +checkAnswer(sql(l2DirSqlStatement), + (3 to 4).map(i => Row(i, i, s"orc$i"))) + } else { +checkAnswer(sql(l2DirSqlStatement), + (3 to 6).map(i => Row(i, i, s"orc$i"))) + } + + val wildcardTopDirStatement = +s""" + |CREATE EXTERNAL TABLE tbl4( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '${new File(s"${dir}/*").toURI}'""".stripMargin + sql(wildcardTopDirStatement) + val wildcardTopDirSqlStatement = s"SELECT * FROM tbl4" + if (convertMetastore)
[GitHub] [spark] kevinyu98 commented on a change in pull request #27130: [SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with subdirectories
kevinyu98 commented on a change in pull request #27130: [SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with subdirectories URL: https://github.com/apache/spark/pull/27130#discussion_r368083822 ## File path: sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala ## @@ -170,4 +170,166 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton { test("SPARK-11412 read and merge orc schemas in parallel") { testMergeSchemasInParallel(OrcFileOperator.readOrcSchemasInParallel) } + + test("SPARK-25993 CREATE EXTERNAL TABLE with subdirectories") { +Seq(true, false).foreach { convertMetastore => + withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> s"$convertMetastore") { +withTempDir { dir => + try { +val orcTblStatement1 = + s""" + |CREATE EXTERNAL TABLE orc_tbl1( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '${s"${dir.getCanonicalPath}/l1/"}'""".stripMargin +sql(orcTblStatement1) + +val orcTblInsertL1 = + s"INSERT INTO TABLE orc_tbl1 VALUES (1, 1, 'orc1'), (2, 2, 'orc2')".stripMargin +sql(orcTblInsertL1) + +val orcTblStatement2 = +s""" + |CREATE EXTERNAL TABLE orc_tbl2( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '${s"${dir.getCanonicalPath}/l1/l2/"}'""".stripMargin +sql(orcTblStatement2) + +val orcTblInsertL2 = + s"INSERT INTO TABLE orc_tbl2 VALUES (3, 3, 'orc3'), (4, 4, 'orc4')".stripMargin +sql(orcTblInsertL2) + +val orcTblStatement3 = +s""" + |CREATE EXTERNAL TABLE orc_tbl3( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '${s"${dir.getCanonicalPath}/l1/l2/l3/"}'""".stripMargin +sql(orcTblStatement3) + +val orcTblInsertL3 = + s"INSERT INTO TABLE orc_tbl3 VALUES (5, 5, 'orc5'), (6, 6, 'orc6')".stripMargin +sql(orcTblInsertL3) + +withTable("tbl1", "tbl2", "tbl3", "tbl4", "tbl5", "tbl6") { + val topDirStatement = +s""" + |CREATE EXTERNAL TABLE tbl1( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '${s"${dir.getCanonicalPath}"}'""".stripMargin + sql(topDirStatement) + val topDirSqlStatement = s"SELECT * FROM tbl1" + if (convertMetastore) { +checkAnswer(sql(topDirSqlStatement), Nil) + } else { +checkAnswer(sql(topDirSqlStatement), + (1 to 6).map(i => Row(i, i, s"orc$i"))) + } + + val l1DirStatement = +s""" + |CREATE EXTERNAL TABLE tbl2( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '${s"${dir.getCanonicalPath}/l1/"}'""".stripMargin + sql(l1DirStatement) + val l1DirSqlStatement = s"SELECT * FROM tbl2" + if (convertMetastore) { +checkAnswer(sql(l1DirSqlStatement), + (1 to 2).map(i => Row(i, i, s"orc$i"))) + } else { +checkAnswer(sql(l1DirSqlStatement), + (1 to 6).map(i => Row(i, i, s"orc$i"))) + } + + val l2DirStatement = +s""" + |CREATE EXTERNAL TABLE tbl3( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '${s"${dir.getCanonicalPath}/l1/l2/"}'""".stripMargin + sql(l2DirStatement) + val l2DirSqlStatement = s"SELECT * FROM tbl3" + if (convertMetastore) { +checkAnswer(sql(l2DirSqlStatement), + (3 to 4).map(i => Row(i, i, s"orc$i"))) + } else { +checkAnswer(sql(l2DirSqlStatement), + (3 to 6).map(i => Row(i, i, s"orc$i"))) + } + + val wildcardTopDirStatement = +s""" + |CREATE EXTERNAL TABLE tbl4( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '${new File(s"${dir}/*").toURI}'""".stripMargin + sql(wildcardTopDirStatement) + val wildcardTopDirSqlStatement = s"SELECT * FROM tbl4" + if (convertMetastore)
[GitHub] [spark] kevinyu98 commented on a change in pull request #27130: [SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with subdirectories
kevinyu98 commented on a change in pull request #27130: [SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with subdirectories URL: https://github.com/apache/spark/pull/27130#discussion_r368083648 ## File path: sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala ## @@ -170,4 +170,166 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton { test("SPARK-11412 read and merge orc schemas in parallel") { testMergeSchemasInParallel(OrcFileOperator.readOrcSchemasInParallel) } + + test("SPARK-25993 CREATE EXTERNAL TABLE with subdirectories") { +Seq(true, false).foreach { convertMetastore => + withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> s"$convertMetastore") { +withTempDir { dir => + try { +val orcTblStatement1 = + s""" + |CREATE EXTERNAL TABLE orc_tbl1( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '${s"${dir.getCanonicalPath}/l1/"}'""".stripMargin +sql(orcTblStatement1) + +val orcTblInsertL1 = + s"INSERT INTO TABLE orc_tbl1 VALUES (1, 1, 'orc1'), (2, 2, 'orc2')".stripMargin +sql(orcTblInsertL1) + +val orcTblStatement2 = +s""" + |CREATE EXTERNAL TABLE orc_tbl2( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '${s"${dir.getCanonicalPath}/l1/l2/"}'""".stripMargin +sql(orcTblStatement2) + +val orcTblInsertL2 = + s"INSERT INTO TABLE orc_tbl2 VALUES (3, 3, 'orc3'), (4, 4, 'orc4')".stripMargin +sql(orcTblInsertL2) + +val orcTblStatement3 = +s""" + |CREATE EXTERNAL TABLE orc_tbl3( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '${s"${dir.getCanonicalPath}/l1/l2/l3/"}'""".stripMargin +sql(orcTblStatement3) + +val orcTblInsertL3 = + s"INSERT INTO TABLE orc_tbl3 VALUES (5, 5, 'orc5'), (6, 6, 'orc6')".stripMargin +sql(orcTblInsertL3) + +withTable("tbl1", "tbl2", "tbl3", "tbl4", "tbl5", "tbl6") { + val topDirStatement = +s""" + |CREATE EXTERNAL TABLE tbl1( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '${s"${dir.getCanonicalPath}"}'""".stripMargin + sql(topDirStatement) + val topDirSqlStatement = s"SELECT * FROM tbl1" + if (convertMetastore) { +checkAnswer(sql(topDirSqlStatement), Nil) + } else { +checkAnswer(sql(topDirSqlStatement), + (1 to 6).map(i => Row(i, i, s"orc$i"))) + } + + val l1DirStatement = +s""" + |CREATE EXTERNAL TABLE tbl2( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '${s"${dir.getCanonicalPath}/l1/"}'""".stripMargin + sql(l1DirStatement) + val l1DirSqlStatement = s"SELECT * FROM tbl2" + if (convertMetastore) { +checkAnswer(sql(l1DirSqlStatement), + (1 to 2).map(i => Row(i, i, s"orc$i"))) + } else { +checkAnswer(sql(l1DirSqlStatement), + (1 to 6).map(i => Row(i, i, s"orc$i"))) + } + + val l2DirStatement = +s""" + |CREATE EXTERNAL TABLE tbl3( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '${s"${dir.getCanonicalPath}/l1/l2/"}'""".stripMargin + sql(l2DirStatement) + val l2DirSqlStatement = s"SELECT * FROM tbl3" + if (convertMetastore) { +checkAnswer(sql(l2DirSqlStatement), + (3 to 4).map(i => Row(i, i, s"orc$i"))) + } else { +checkAnswer(sql(l2DirSqlStatement), + (3 to 6).map(i => Row(i, i, s"orc$i"))) Review comment: This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail:
[GitHub] [spark] kevinyu98 commented on a change in pull request #27130: [SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with subdirectories
kevinyu98 commented on a change in pull request #27130: [SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with subdirectories URL: https://github.com/apache/spark/pull/27130#discussion_r368083468 ## File path: sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala ## @@ -170,4 +170,166 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton { test("SPARK-11412 read and merge orc schemas in parallel") { testMergeSchemasInParallel(OrcFileOperator.readOrcSchemasInParallel) } + + test("SPARK-25993 CREATE EXTERNAL TABLE with subdirectories") { +Seq(true, false).foreach { convertMetastore => + withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> s"$convertMetastore") { +withTempDir { dir => + try { +val orcTblStatement1 = + s""" + |CREATE EXTERNAL TABLE orc_tbl1( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '${s"${dir.getCanonicalPath}/l1/"}'""".stripMargin +sql(orcTblStatement1) + +val orcTblInsertL1 = + s"INSERT INTO TABLE orc_tbl1 VALUES (1, 1, 'orc1'), (2, 2, 'orc2')".stripMargin +sql(orcTblInsertL1) + +val orcTblStatement2 = +s""" + |CREATE EXTERNAL TABLE orc_tbl2( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '${s"${dir.getCanonicalPath}/l1/l2/"}'""".stripMargin +sql(orcTblStatement2) + +val orcTblInsertL2 = + s"INSERT INTO TABLE orc_tbl2 VALUES (3, 3, 'orc3'), (4, 4, 'orc4')".stripMargin +sql(orcTblInsertL2) + +val orcTblStatement3 = +s""" + |CREATE EXTERNAL TABLE orc_tbl3( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '${s"${dir.getCanonicalPath}/l1/l2/l3/"}'""".stripMargin +sql(orcTblStatement3) + +val orcTblInsertL3 = + s"INSERT INTO TABLE orc_tbl3 VALUES (5, 5, 'orc5'), (6, 6, 'orc6')".stripMargin +sql(orcTblInsertL3) + +withTable("tbl1", "tbl2", "tbl3", "tbl4", "tbl5", "tbl6") { + val topDirStatement = +s""" + |CREATE EXTERNAL TABLE tbl1( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '${s"${dir.getCanonicalPath}"}'""".stripMargin + sql(topDirStatement) + val topDirSqlStatement = s"SELECT * FROM tbl1" + if (convertMetastore) { +checkAnswer(sql(topDirSqlStatement), Nil) + } else { +checkAnswer(sql(topDirSqlStatement), + (1 to 6).map(i => Row(i, i, s"orc$i"))) + } + + val l1DirStatement = +s""" + |CREATE EXTERNAL TABLE tbl2( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '${s"${dir.getCanonicalPath}/l1/"}'""".stripMargin + sql(l1DirStatement) + val l1DirSqlStatement = s"SELECT * FROM tbl2" + if (convertMetastore) { +checkAnswer(sql(l1DirSqlStatement), + (1 to 2).map(i => Row(i, i, s"orc$i"))) Review comment: sure This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] kevinyu98 commented on a change in pull request #27130: [SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with subdirectories
kevinyu98 commented on a change in pull request #27130: [SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with subdirectories URL: https://github.com/apache/spark/pull/27130#discussion_r368083514 ## File path: sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala ## @@ -170,4 +170,166 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton { test("SPARK-11412 read and merge orc schemas in parallel") { testMergeSchemasInParallel(OrcFileOperator.readOrcSchemasInParallel) } + + test("SPARK-25993 CREATE EXTERNAL TABLE with subdirectories") { +Seq(true, false).foreach { convertMetastore => + withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> s"$convertMetastore") { +withTempDir { dir => + try { +val orcTblStatement1 = + s""" + |CREATE EXTERNAL TABLE orc_tbl1( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '${s"${dir.getCanonicalPath}/l1/"}'""".stripMargin +sql(orcTblStatement1) + +val orcTblInsertL1 = + s"INSERT INTO TABLE orc_tbl1 VALUES (1, 1, 'orc1'), (2, 2, 'orc2')".stripMargin +sql(orcTblInsertL1) + +val orcTblStatement2 = +s""" + |CREATE EXTERNAL TABLE orc_tbl2( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '${s"${dir.getCanonicalPath}/l1/l2/"}'""".stripMargin +sql(orcTblStatement2) + +val orcTblInsertL2 = + s"INSERT INTO TABLE orc_tbl2 VALUES (3, 3, 'orc3'), (4, 4, 'orc4')".stripMargin +sql(orcTblInsertL2) + +val orcTblStatement3 = +s""" + |CREATE EXTERNAL TABLE orc_tbl3( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '${s"${dir.getCanonicalPath}/l1/l2/l3/"}'""".stripMargin +sql(orcTblStatement3) + +val orcTblInsertL3 = + s"INSERT INTO TABLE orc_tbl3 VALUES (5, 5, 'orc5'), (6, 6, 'orc6')".stripMargin +sql(orcTblInsertL3) + +withTable("tbl1", "tbl2", "tbl3", "tbl4", "tbl5", "tbl6") { + val topDirStatement = +s""" + |CREATE EXTERNAL TABLE tbl1( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '${s"${dir.getCanonicalPath}"}'""".stripMargin + sql(topDirStatement) + val topDirSqlStatement = s"SELECT * FROM tbl1" + if (convertMetastore) { +checkAnswer(sql(topDirSqlStatement), Nil) + } else { +checkAnswer(sql(topDirSqlStatement), + (1 to 6).map(i => Row(i, i, s"orc$i"))) + } + + val l1DirStatement = +s""" + |CREATE EXTERNAL TABLE tbl2( + | c1 int, + | c2 int, + | c3 string) + |STORED AS orc + |LOCATION '${s"${dir.getCanonicalPath}/l1/"}'""".stripMargin + sql(l1DirStatement) + val l1DirSqlStatement = s"SELECT * FROM tbl2" + if (convertMetastore) { +checkAnswer(sql(l1DirSqlStatement), + (1 to 2).map(i => Row(i, i, s"orc$i"))) + } else { +checkAnswer(sql(l1DirSqlStatement), + (1 to 6).map(i => Row(i, i, s"orc$i"))) Review comment: done This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] kevinyu98 commented on a change in pull request #27130: [SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with subdirectories
kevinyu98 commented on a change in pull request #27130: [SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with subdirectories URL: https://github.com/apache/spark/pull/27130#discussion_r365561742 ## File path: sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala ## @@ -170,4 +170,156 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton { test("SPARK-11412 read and merge orc schemas in parallel") { testMergeSchemasInParallel(OrcFileOperator.readOrcSchemasInParallel) } + + test("SPARK-25993 CREATE EXTERNAL TABLE with subdirectories") { +Seq(true, false).foreach { convertMetastore => + withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> s"$convertMetastore") { +withTempDir { dir => + try { +sql("USE default") +sql( + """ +|CREATE EXTERNAL TABLE hive_orc( Review comment: @dongjoon-hyun Thanks for pointing out this. I was using other test cases without thinking too much. I have changed the name. I also replaced the hiveClient.runSqlHive for the insert stmt. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] kevinyu98 commented on a change in pull request #27130: [SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with subdirectories
kevinyu98 commented on a change in pull request #27130: [SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with subdirectories URL: https://github.com/apache/spark/pull/27130#discussion_r365352490 ## File path: sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala ## @@ -222,4 +223,127 @@ class HiveParquetSourceSuite extends ParquetPartitioningTest { assert(df4.columns === Array("str", "max_int")) } } + + test("SPARK-25993 CREATE EXTERNAL TABLE with subdirectories") { +Seq("true", "false").foreach { parquetConversion => + withSQLConf(HiveUtils.CONVERT_METASTORE_PARQUET.key -> parquetConversion) { +withTempPath { path => + withTable("tbl1", "tbl2", "tbl3", "tbl4", "tbl5", "tbl6") { +val someDF1 = Seq((1, 1, "parq1"), (2, 2, "parq2")). + toDF("c1", "c2", "c3").repartition(1) +val someDF2 = Seq((3, 3, "parq3"), (4, 4, "parq4")). + toDF("c1", "c2", "c3").repartition(1) +val someDF3 = Seq((5, 5, "parq5"), (6, 6, "parq6")). + toDF("c1", "c2", "c3").repartition(1) +someDF1.write.parquet(s"${path.getCanonicalPath}/l1/") +someDF2.write.parquet(s"${path.getCanonicalPath}/l1/l2/") +someDF3.write.parquet(s"${path.getCanonicalPath}/l1/l2/l3/") + +val topDirStatement = + s""" + |CREATE EXTERNAL TABLE tbl1( + | c1 int, + | c2 int, + | c3 string) + |STORED AS parquet + |LOCATION '${s"${path.getCanonicalPath}"}'""".stripMargin +sql(topDirStatement) +if (parquetConversion == "true") { + checkAnswer(sql("SELECT * FROM tbl1"), Nil) +} else { + val msg = intercept[IOException] {sql("SELECT * FROM tbl1").show() + }.getMessage +assert(msg.contains("Not a file:")) +} + +val l1DirStatement = + s""" + |CREATE EXTERNAL TABLE tbl2( + | c1 int, + | c2 int, + | c3 string) + |STORED AS parquet + |LOCATION '${s"${path.getCanonicalPath}/l1/"}'""".stripMargin +sql(l1DirStatement) +if (parquetConversion == "true") { + checkAnswer(sql("SELECT * FROM tbl2"), +(1 to 2).map(i => Row(i, i, s"parq$i"))) +} else { + val msg = intercept[IOException] { +sql("SELECT * FROM tbl2").show() + }.getMessage + assert(msg.contains("Not a file:")) +} + +val l2DirStatement = + s""" + |CREATE EXTERNAL TABLE tbl3( + | c1 int, + | c2 int, + | c3 string) + |STORED AS parquet + |LOCATION '${s"${path.getCanonicalPath}/l1/l2/"}'""".stripMargin +sql(l2DirStatement) +if (parquetConversion == "true") { + checkAnswer(sql("SELECT * FROM tbl3"), +(3 to 4).map(i => Row(i, i, s"parq$i"))) +} else { + val msg = intercept[IOException] {sql("SELECT * FROM tbl3").show() Review comment: moved to next line. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] kevinyu98 commented on a change in pull request #27130: [SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with subdirectories
kevinyu98 commented on a change in pull request #27130: [SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with subdirectories URL: https://github.com/apache/spark/pull/27130#discussion_r365352648 ## File path: sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala ## @@ -222,4 +223,127 @@ class HiveParquetSourceSuite extends ParquetPartitioningTest { assert(df4.columns === Array("str", "max_int")) } } + + test("SPARK-25993 CREATE EXTERNAL TABLE with subdirectories") { +Seq("true", "false").foreach { parquetConversion => + withSQLConf(HiveUtils.CONVERT_METASTORE_PARQUET.key -> parquetConversion) { +withTempPath { path => + withTable("tbl1", "tbl2", "tbl3", "tbl4", "tbl5", "tbl6") { +val someDF1 = Seq((1, 1, "parq1"), (2, 2, "parq2")). + toDF("c1", "c2", "c3").repartition(1) +val someDF2 = Seq((3, 3, "parq3"), (4, 4, "parq4")). + toDF("c1", "c2", "c3").repartition(1) +val someDF3 = Seq((5, 5, "parq5"), (6, 6, "parq6")). + toDF("c1", "c2", "c3").repartition(1) +someDF1.write.parquet(s"${path.getCanonicalPath}/l1/") +someDF2.write.parquet(s"${path.getCanonicalPath}/l1/l2/") +someDF3.write.parquet(s"${path.getCanonicalPath}/l1/l2/l3/") + +val topDirStatement = + s""" + |CREATE EXTERNAL TABLE tbl1( + | c1 int, + | c2 int, + | c3 string) + |STORED AS parquet + |LOCATION '${s"${path.getCanonicalPath}"}'""".stripMargin +sql(topDirStatement) +if (parquetConversion == "true") { + checkAnswer(sql("SELECT * FROM tbl1"), Nil) +} else { + val msg = intercept[IOException] {sql("SELECT * FROM tbl1").show() + }.getMessage +assert(msg.contains("Not a file:")) +} + +val l1DirStatement = + s""" + |CREATE EXTERNAL TABLE tbl2( + | c1 int, + | c2 int, + | c3 string) + |STORED AS parquet + |LOCATION '${s"${path.getCanonicalPath}/l1/"}'""".stripMargin +sql(l1DirStatement) +if (parquetConversion == "true") { + checkAnswer(sql("SELECT * FROM tbl2"), +(1 to 2).map(i => Row(i, i, s"parq$i"))) +} else { + val msg = intercept[IOException] { +sql("SELECT * FROM tbl2").show() + }.getMessage + assert(msg.contains("Not a file:")) +} + +val l2DirStatement = + s""" + |CREATE EXTERNAL TABLE tbl3( + | c1 int, + | c2 int, + | c3 string) + |STORED AS parquet + |LOCATION '${s"${path.getCanonicalPath}/l1/l2/"}'""".stripMargin +sql(l2DirStatement) +if (parquetConversion == "true") { + checkAnswer(sql("SELECT * FROM tbl3"), +(3 to 4).map(i => Row(i, i, s"parq$i"))) +} else { + val msg = intercept[IOException] {sql("SELECT * FROM tbl3").show() + }.getMessage + assert(msg.contains("Not a file:")) +} + +val wildcardTopDirStatement = + s""" + |CREATE EXTERNAL TABLE tbl4( + | c1 int, + | c2 int, + | c3 string) + |STORED AS parquet + |LOCATION '${new File(s"${path}/*").toURI}'""".stripMargin +sql(wildcardTopDirStatement) +if (parquetConversion == "true") { + checkAnswer(sql("SELECT * FROM tbl4"), +(1 to 2).map(i => Row(i, i, s"parq$i"))) +} else { + val msg = intercept[IOException] { +sql("SELECT * FROM tbl4").show() + }.getMessage + assert(msg.contains("Not a file:")) +} + +val wildcardL1DirStatement = + s""" + |CREATE EXTERNAL TABLE tbl5( + | c1 int, + | c2 int, + | c3 string) + |STORED AS parquet + |LOCATION '${new File(s"${path}/l1/*").toURI}'""".stripMargin +sql(wildcardL1DirStatement) +if (parquetConversion == "true") { + checkAnswer(sql("SELECT * FROM tbl5"), +(1 to 4).map(i => Row(i, i, s"parq$i"))) +} else { + val msg = intercept[IOException] {sql("SELECT * FROM tbl5").show() Review comment: This is an automated message from the Apache Git Service. To respond to the message, please log on to
[GitHub] [spark] kevinyu98 commented on a change in pull request #27130: [SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with subdirectories
kevinyu98 commented on a change in pull request #27130: [SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with subdirectories URL: https://github.com/apache/spark/pull/27130#discussion_r365350731 ## File path: sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala ## @@ -222,4 +223,127 @@ class HiveParquetSourceSuite extends ParquetPartitioningTest { assert(df4.columns === Array("str", "max_int")) } } + + test("SPARK-25993 CREATE EXTERNAL TABLE with subdirectories") { +Seq("true", "false").foreach { parquetConversion => + withSQLConf(HiveUtils.CONVERT_METASTORE_PARQUET.key -> parquetConversion) { +withTempPath { path => + withTable("tbl1", "tbl2", "tbl3", "tbl4", "tbl5", "tbl6") { +val someDF1 = Seq((1, 1, "parq1"), (2, 2, "parq2")). + toDF("c1", "c2", "c3").repartition(1) +val someDF2 = Seq((3, 3, "parq3"), (4, 4, "parq4")). + toDF("c1", "c2", "c3").repartition(1) +val someDF3 = Seq((5, 5, "parq5"), (6, 6, "parq6")). + toDF("c1", "c2", "c3").repartition(1) +someDF1.write.parquet(s"${path.getCanonicalPath}/l1/") +someDF2.write.parquet(s"${path.getCanonicalPath}/l1/l2/") +someDF3.write.parquet(s"${path.getCanonicalPath}/l1/l2/l3/") + +val topDirStatement = + s""" + |CREATE EXTERNAL TABLE tbl1( + | c1 int, + | c2 int, + | c3 string) + |STORED AS parquet + |LOCATION '${s"${path.getCanonicalPath}"}'""".stripMargin +sql(topDirStatement) +if (parquetConversion == "true") { + checkAnswer(sql("SELECT * FROM tbl1"), Nil) +} else { + val msg = intercept[IOException] {sql("SELECT * FROM tbl1").show() + }.getMessage +assert(msg.contains("Not a file:")) +} + +val l1DirStatement = + s""" + |CREATE EXTERNAL TABLE tbl2( + | c1 int, + | c2 int, + | c3 string) + |STORED AS parquet + |LOCATION '${s"${path.getCanonicalPath}/l1/"}'""".stripMargin +sql(l1DirStatement) +if (parquetConversion == "true") { + checkAnswer(sql("SELECT * FROM tbl2"), +(1 to 2).map(i => Row(i, i, s"parq$i"))) Review comment: This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] kevinyu98 commented on a change in pull request #27130: [SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with subdirectories
kevinyu98 commented on a change in pull request #27130: [SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with subdirectories URL: https://github.com/apache/spark/pull/27130#discussion_r365348165 ## File path: sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala ## @@ -222,4 +223,127 @@ class HiveParquetSourceSuite extends ParquetPartitioningTest { assert(df4.columns === Array("str", "max_int")) } } + + test("SPARK-25993 CREATE EXTERNAL TABLE with subdirectories") { +Seq("true", "false").foreach { parquetConversion => + withSQLConf(HiveUtils.CONVERT_METASTORE_PARQUET.key -> parquetConversion) { +withTempPath { path => + withTable("tbl1", "tbl2", "tbl3", "tbl4", "tbl5", "tbl6") { +val someDF1 = Seq((1, 1, "parq1"), (2, 2, "parq2")). + toDF("c1", "c2", "c3").repartition(1) +val someDF2 = Seq((3, 3, "parq3"), (4, 4, "parq4")). + toDF("c1", "c2", "c3").repartition(1) +val someDF3 = Seq((5, 5, "parq5"), (6, 6, "parq6")). + toDF("c1", "c2", "c3").repartition(1) +someDF1.write.parquet(s"${path.getCanonicalPath}/l1/") +someDF2.write.parquet(s"${path.getCanonicalPath}/l1/l2/") +someDF3.write.parquet(s"${path.getCanonicalPath}/l1/l2/l3/") + +val topDirStatement = + s""" + |CREATE EXTERNAL TABLE tbl1( + | c1 int, + | c2 int, + | c3 string) + |STORED AS parquet + |LOCATION '${s"${path.getCanonicalPath}"}'""".stripMargin +sql(topDirStatement) +if (parquetConversion == "true") { + checkAnswer(sql("SELECT * FROM tbl1"), Nil) +} else { + val msg = intercept[IOException] {sql("SELECT * FROM tbl1").show() + }.getMessage +assert(msg.contains("Not a file:")) Review comment: This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] kevinyu98 commented on a change in pull request #27130: [SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with subdirectories
kevinyu98 commented on a change in pull request #27130: [SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with subdirectories URL: https://github.com/apache/spark/pull/27130#discussion_r365347814 ## File path: sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala ## @@ -222,4 +223,127 @@ class HiveParquetSourceSuite extends ParquetPartitioningTest { assert(df4.columns === Array("str", "max_int")) } } + + test("SPARK-25993 CREATE EXTERNAL TABLE with subdirectories") { +Seq("true", "false").foreach { parquetConversion => + withSQLConf(HiveUtils.CONVERT_METASTORE_PARQUET.key -> parquetConversion) { +withTempPath { path => + withTable("tbl1", "tbl2", "tbl3", "tbl4", "tbl5", "tbl6") { +val someDF1 = Seq((1, 1, "parq1"), (2, 2, "parq2")). + toDF("c1", "c2", "c3").repartition(1) +val someDF2 = Seq((3, 3, "parq3"), (4, 4, "parq4")). + toDF("c1", "c2", "c3").repartition(1) +val someDF3 = Seq((5, 5, "parq5"), (6, 6, "parq6")). + toDF("c1", "c2", "c3").repartition(1) +someDF1.write.parquet(s"${path.getCanonicalPath}/l1/") +someDF2.write.parquet(s"${path.getCanonicalPath}/l1/l2/") +someDF3.write.parquet(s"${path.getCanonicalPath}/l1/l2/l3/") + +val topDirStatement = + s""" + |CREATE EXTERNAL TABLE tbl1( + | c1 int, + | c2 int, + | c3 string) + |STORED AS parquet + |LOCATION '${s"${path.getCanonicalPath}"}'""".stripMargin +sql(topDirStatement) +if (parquetConversion == "true") { + checkAnswer(sql("SELECT * FROM tbl1"), Nil) +} else { + val msg = intercept[IOException] {sql("SELECT * FROM tbl1").show() Review comment: thanks, changed. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] kevinyu98 commented on a change in pull request #27130: [SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with subdirectories
kevinyu98 commented on a change in pull request #27130: [SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with subdirectories URL: https://github.com/apache/spark/pull/27130#discussion_r364994517 ## File path: sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala ## @@ -222,4 +223,115 @@ class HiveParquetSourceSuite extends ParquetPartitioningTest { assert(df4.columns === Array("str", "max_int")) } } + + test("SPARK-25993 CREATE EXTERNAL TABLE with subdirectories") { +Seq("true", "false").foreach { parquetConversion => + withSQLConf(HiveUtils.CONVERT_METASTORE_PARQUET.key -> parquetConversion) { +withTempPath { path => + withTable("tbl1", "tbl2", "tbl3", "tbl4", "tbl5", "tbl6") { +val someDF1 = Seq((1, 1, "parq1"), (2, 2, "parq2")). + toDF("c1", "c2", "c3").repartition(1) +val someDF2 = Seq((3, 3, "parq3"), (4, 4, "parq4")). + toDF("c1", "c2", "c3").repartition(1) +val someDF3 = Seq((5, 5, "parq5"), (6, 6, "parq6")). + toDF("c1", "c2", "c3").repartition(1) +someDF1.write.parquet(s"${path.getCanonicalPath}/l1/") +someDF2.write.parquet(s"${path.getCanonicalPath}/l1/l2/") +someDF3.write.parquet(s"${path.getCanonicalPath}/l1/l2/l3/") + +val topDirStatement = + s""" + |CREATE EXTERNAL TABLE tbl1( + | c1 int, + | c2 int, + | c3 string) + |STORED AS parquet + |LOCATION '${s"${path.getCanonicalPath}"}'""".stripMargin +sql(topDirStatement) +if (parquetConversion == "true") { + checkAnswer(sql("select * from tbl1"), Nil) +} else { + intercept[IOException](sql("select * from tbl1").show()) +} + +val l1DirStatement = + s""" + |CREATE EXTERNAL TABLE tbl2( + | c1 int, + | c2 int, + | c3 string) + |STORED AS parquet + |LOCATION '${s"${path.getCanonicalPath}/l1/"}'""".stripMargin +sql(l1DirStatement) +if (parquetConversion == "true") { + checkAnswer(sql("select * from tbl2"), +(1 to 2).map(i => Row(i, i, s"parq$i"))) +} else { + intercept[IOException](sql("select * from tbl2").show()) +} + +val l2DirStatement = + s""" + |CREATE EXTERNAL TABLE tbl3( + | c1 int, + | c2 int, + | c3 string) + |STORED AS parquet + |LOCATION '${s"${path.getCanonicalPath}/l1/l2/"}'""".stripMargin +sql(l2DirStatement) +if (parquetConversion == "true") { + checkAnswer(sql("select * from tbl3"), +(3 to 4).map(i => Row(i, i, s"parq$i"))) +} else { + intercept[IOException](sql("select * from tbl3").show()) Review comment: added the checking This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] kevinyu98 commented on a change in pull request #27130: [SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with subdirectories
kevinyu98 commented on a change in pull request #27130: [SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with subdirectories URL: https://github.com/apache/spark/pull/27130#discussion_r364994378 ## File path: sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala ## @@ -222,4 +223,115 @@ class HiveParquetSourceSuite extends ParquetPartitioningTest { assert(df4.columns === Array("str", "max_int")) } } + + test("SPARK-25993 CREATE EXTERNAL TABLE with subdirectories") { +Seq("true", "false").foreach { parquetConversion => + withSQLConf(HiveUtils.CONVERT_METASTORE_PARQUET.key -> parquetConversion) { +withTempPath { path => + withTable("tbl1", "tbl2", "tbl3", "tbl4", "tbl5", "tbl6") { +val someDF1 = Seq((1, 1, "parq1"), (2, 2, "parq2")). + toDF("c1", "c2", "c3").repartition(1) +val someDF2 = Seq((3, 3, "parq3"), (4, 4, "parq4")). + toDF("c1", "c2", "c3").repartition(1) +val someDF3 = Seq((5, 5, "parq5"), (6, 6, "parq6")). + toDF("c1", "c2", "c3").repartition(1) +someDF1.write.parquet(s"${path.getCanonicalPath}/l1/") +someDF2.write.parquet(s"${path.getCanonicalPath}/l1/l2/") +someDF3.write.parquet(s"${path.getCanonicalPath}/l1/l2/l3/") + +val topDirStatement = + s""" + |CREATE EXTERNAL TABLE tbl1( + | c1 int, + | c2 int, + | c3 string) + |STORED AS parquet + |LOCATION '${s"${path.getCanonicalPath}"}'""".stripMargin +sql(topDirStatement) +if (parquetConversion == "true") { + checkAnswer(sql("select * from tbl1"), Nil) +} else { + intercept[IOException](sql("select * from tbl1").show()) +} + +val l1DirStatement = + s""" + |CREATE EXTERNAL TABLE tbl2( + | c1 int, + | c2 int, + | c3 string) + |STORED AS parquet + |LOCATION '${s"${path.getCanonicalPath}/l1/"}'""".stripMargin +sql(l1DirStatement) +if (parquetConversion == "true") { + checkAnswer(sql("select * from tbl2"), +(1 to 2).map(i => Row(i, i, s"parq$i"))) +} else { + intercept[IOException](sql("select * from tbl2").show()) Review comment: the whole exception message is `Not a file: file:/Users/qianyangyu/IdeaProjects/spark/target/tmp/spark-abc8c1ad-4a3a-420f-b4fc-58d995be9bb0/l1`, I will check the first part `Not a file:`. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] kevinyu98 commented on a change in pull request #27130: [SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with subdirectories
kevinyu98 commented on a change in pull request #27130: [SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with subdirectories URL: https://github.com/apache/spark/pull/27130#discussion_r364994450 ## File path: sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala ## @@ -222,4 +223,115 @@ class HiveParquetSourceSuite extends ParquetPartitioningTest { assert(df4.columns === Array("str", "max_int")) } } + + test("SPARK-25993 CREATE EXTERNAL TABLE with subdirectories") { +Seq("true", "false").foreach { parquetConversion => + withSQLConf(HiveUtils.CONVERT_METASTORE_PARQUET.key -> parquetConversion) { +withTempPath { path => + withTable("tbl1", "tbl2", "tbl3", "tbl4", "tbl5", "tbl6") { +val someDF1 = Seq((1, 1, "parq1"), (2, 2, "parq2")). + toDF("c1", "c2", "c3").repartition(1) +val someDF2 = Seq((3, 3, "parq3"), (4, 4, "parq4")). + toDF("c1", "c2", "c3").repartition(1) +val someDF3 = Seq((5, 5, "parq5"), (6, 6, "parq6")). + toDF("c1", "c2", "c3").repartition(1) +someDF1.write.parquet(s"${path.getCanonicalPath}/l1/") +someDF2.write.parquet(s"${path.getCanonicalPath}/l1/l2/") +someDF3.write.parquet(s"${path.getCanonicalPath}/l1/l2/l3/") + +val topDirStatement = + s""" + |CREATE EXTERNAL TABLE tbl1( + | c1 int, + | c2 int, + | c3 string) + |STORED AS parquet + |LOCATION '${s"${path.getCanonicalPath}"}'""".stripMargin +sql(topDirStatement) +if (parquetConversion == "true") { + checkAnswer(sql("select * from tbl1"), Nil) +} else { + intercept[IOException](sql("select * from tbl1").show()) +} + +val l1DirStatement = + s""" + |CREATE EXTERNAL TABLE tbl2( + | c1 int, + | c2 int, + | c3 string) + |STORED AS parquet + |LOCATION '${s"${path.getCanonicalPath}/l1/"}'""".stripMargin +sql(l1DirStatement) +if (parquetConversion == "true") { + checkAnswer(sql("select * from tbl2"), +(1 to 2).map(i => Row(i, i, s"parq$i"))) +} else { + intercept[IOException](sql("select * from tbl2").show()) +} + +val l2DirStatement = + s""" + |CREATE EXTERNAL TABLE tbl3( + | c1 int, + | c2 int, + | c3 string) + |STORED AS parquet + |LOCATION '${s"${path.getCanonicalPath}/l1/l2/"}'""".stripMargin +sql(l2DirStatement) +if (parquetConversion == "true") { + checkAnswer(sql("select * from tbl3"), Review comment: Changed. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] kevinyu98 commented on a change in pull request #27130: [SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with subdirectories
kevinyu98 commented on a change in pull request #27130: [SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with subdirectories URL: https://github.com/apache/spark/pull/27130#discussion_r364993517 ## File path: sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala ## @@ -222,4 +223,115 @@ class HiveParquetSourceSuite extends ParquetPartitioningTest { assert(df4.columns === Array("str", "max_int")) } } + + test("SPARK-25993 CREATE EXTERNAL TABLE with subdirectories") { +Seq("true", "false").foreach { parquetConversion => + withSQLConf(HiveUtils.CONVERT_METASTORE_PARQUET.key -> parquetConversion) { +withTempPath { path => + withTable("tbl1", "tbl2", "tbl3", "tbl4", "tbl5", "tbl6") { +val someDF1 = Seq((1, 1, "parq1"), (2, 2, "parq2")). + toDF("c1", "c2", "c3").repartition(1) +val someDF2 = Seq((3, 3, "parq3"), (4, 4, "parq4")). + toDF("c1", "c2", "c3").repartition(1) +val someDF3 = Seq((5, 5, "parq5"), (6, 6, "parq6")). + toDF("c1", "c2", "c3").repartition(1) +someDF1.write.parquet(s"${path.getCanonicalPath}/l1/") +someDF2.write.parquet(s"${path.getCanonicalPath}/l1/l2/") +someDF3.write.parquet(s"${path.getCanonicalPath}/l1/l2/l3/") + +val topDirStatement = + s""" + |CREATE EXTERNAL TABLE tbl1( + | c1 int, + | c2 int, + | c3 string) + |STORED AS parquet + |LOCATION '${s"${path.getCanonicalPath}"}'""".stripMargin +sql(topDirStatement) +if (parquetConversion == "true") { + checkAnswer(sql("select * from tbl1"), Nil) +} else { + intercept[IOException](sql("select * from tbl1").show()) +} + +val l1DirStatement = + s""" + |CREATE EXTERNAL TABLE tbl2( + | c1 int, + | c2 int, + | c3 string) + |STORED AS parquet + |LOCATION '${s"${path.getCanonicalPath}/l1/"}'""".stripMargin +sql(l1DirStatement) +if (parquetConversion == "true") { + checkAnswer(sql("select * from tbl2"), Review comment: changed This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] kevinyu98 commented on a change in pull request #27130: [SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with subdirectories
kevinyu98 commented on a change in pull request #27130: [SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with subdirectories URL: https://github.com/apache/spark/pull/27130#discussion_r364952942 ## File path: sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala ## @@ -170,4 +170,155 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton { test("SPARK-11412 read and merge orc schemas in parallel") { testMergeSchemasInParallel(OrcFileOperator.readOrcSchemasInParallel) } + + test("SPARK-25993 CREATE EXTERNAL TABLE with subdirectories") { +Seq(true, false).foreach { convertMetastore => + withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> s"$convertMetastore") { +withTempDir { dir => + try { +hiveClient.runSqlHive("USE default") +hiveClient.runSqlHive( Review comment: sure, I will change to sql. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org