[GitHub] [spark] kevinyu98 commented on a change in pull request #27130: [SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with subdirectories

2020-01-17 Thread GitBox
kevinyu98 commented on a change in pull request #27130: 
[SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with 
subdirectories
URL: https://github.com/apache/spark/pull/27130#discussion_r368087150
 
 

 ##
 File path: 
sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala
 ##
 @@ -170,4 +170,166 @@ class HiveOrcSourceSuite extends OrcSuite with 
TestHiveSingleton {
   test("SPARK-11412 read and merge orc schemas in parallel") {
 testMergeSchemasInParallel(OrcFileOperator.readOrcSchemasInParallel)
   }
+
+  test("SPARK-25993 CREATE EXTERNAL TABLE with subdirectories") {
+Seq(true, false).foreach { convertMetastore =>
+  withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> s"$convertMetastore") 
{
+withTempDir { dir =>
+  try {
+val orcTblStatement1 =
+  s"""
+ |CREATE EXTERNAL TABLE orc_tbl1(
+ |  c1 int,
+ |  c2 int,
+ |  c3 string)
+ |STORED AS orc
+ |LOCATION '${s"${dir.getCanonicalPath}/l1/"}'""".stripMargin
+sql(orcTblStatement1)
+
+val orcTblInsertL1 =
+  s"INSERT INTO TABLE orc_tbl1 VALUES (1, 1, 'orc1'), (2, 2, 
'orc2')".stripMargin
+sql(orcTblInsertL1)
+
+val orcTblStatement2 =
+s"""
+   |CREATE EXTERNAL TABLE orc_tbl2(
+   |  c1 int,
+   |  c2 int,
+   |  c3 string)
+   |STORED AS orc
+   |LOCATION '${s"${dir.getCanonicalPath}/l1/l2/"}'""".stripMargin
+sql(orcTblStatement2)
+
+val orcTblInsertL2 =
+  s"INSERT INTO TABLE orc_tbl2 VALUES (3, 3, 'orc3'), (4, 4, 
'orc4')".stripMargin
+sql(orcTblInsertL2)
+
+val orcTblStatement3 =
+s"""
+   |CREATE EXTERNAL TABLE orc_tbl3(
+   |  c1 int,
+   |  c2 int,
+   |  c3 string)
+   |STORED AS orc
+   |LOCATION 
'${s"${dir.getCanonicalPath}/l1/l2/l3/"}'""".stripMargin
+sql(orcTblStatement3)
+
+val orcTblInsertL3 =
+  s"INSERT INTO TABLE orc_tbl3 VALUES (5, 5, 'orc5'), (6, 6, 
'orc6')".stripMargin
+sql(orcTblInsertL3)
+
+withTable("tbl1", "tbl2", "tbl3", "tbl4", "tbl5", "tbl6") {
+  val topDirStatement =
+s"""
+   |CREATE EXTERNAL TABLE tbl1(
+   |  c1 int,
+   |  c2 int,
+   |  c3 string)
+   |STORED AS orc
+   |LOCATION '${s"${dir.getCanonicalPath}"}'""".stripMargin
+  sql(topDirStatement)
+  val topDirSqlStatement = s"SELECT * FROM tbl1"
+  if (convertMetastore) {
+checkAnswer(sql(topDirSqlStatement), Nil)
+  } else {
+checkAnswer(sql(topDirSqlStatement),
+  (1 to 6).map(i => Row(i, i, s"orc$i")))
+  }
+
+  val l1DirStatement =
+s"""
+   |CREATE EXTERNAL TABLE tbl2(
+   |  c1 int,
+   |  c2 int,
+   |  c3 string)
+   |STORED AS orc
+   |LOCATION '${s"${dir.getCanonicalPath}/l1/"}'""".stripMargin
+  sql(l1DirStatement)
+  val l1DirSqlStatement = s"SELECT * FROM tbl2"
+  if (convertMetastore) {
+checkAnswer(sql(l1DirSqlStatement),
+  (1 to 2).map(i => Row(i, i, s"orc$i")))
+  } else {
+checkAnswer(sql(l1DirSqlStatement),
+  (1 to 6).map(i => Row(i, i, s"orc$i")))
+  }
+
+  val l2DirStatement =
+s"""
+   |CREATE EXTERNAL TABLE tbl3(
+   |  c1 int,
+   |  c2 int,
+   |  c3 string)
+   |STORED AS orc
+   |LOCATION 
'${s"${dir.getCanonicalPath}/l1/l2/"}'""".stripMargin
+  sql(l2DirStatement)
+  val l2DirSqlStatement = s"SELECT * FROM tbl3"
+  if (convertMetastore) {
+checkAnswer(sql(l2DirSqlStatement),
+  (3 to 4).map(i => Row(i, i, s"orc$i")))
+  } else {
+checkAnswer(sql(l2DirSqlStatement),
+  (3 to 6).map(i => Row(i, i, s"orc$i")))
+  }
+
+  val wildcardTopDirStatement =
+s"""
+   |CREATE EXTERNAL TABLE tbl4(
+   |  c1 int,
+   |  c2 int,
+   |  c3 string)
+   |STORED AS orc
+   |LOCATION '${new File(s"${dir}/*").toURI}'""".stripMargin
+  sql(wildcardTopDirStatement)
+  val wildcardTopDirSqlStatement = s"SELECT * FROM tbl4"
+  if (convertMetastore) 

[GitHub] [spark] kevinyu98 commented on a change in pull request #27130: [SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with subdirectories

2020-01-17 Thread GitBox
kevinyu98 commented on a change in pull request #27130: 
[SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with 
subdirectories
URL: https://github.com/apache/spark/pull/27130#discussion_r368083946
 
 

 ##
 File path: 
sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala
 ##
 @@ -170,4 +170,166 @@ class HiveOrcSourceSuite extends OrcSuite with 
TestHiveSingleton {
   test("SPARK-11412 read and merge orc schemas in parallel") {
 testMergeSchemasInParallel(OrcFileOperator.readOrcSchemasInParallel)
   }
+
+  test("SPARK-25993 CREATE EXTERNAL TABLE with subdirectories") {
+Seq(true, false).foreach { convertMetastore =>
+  withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> s"$convertMetastore") 
{
+withTempDir { dir =>
+  try {
+val orcTblStatement1 =
+  s"""
+ |CREATE EXTERNAL TABLE orc_tbl1(
+ |  c1 int,
+ |  c2 int,
+ |  c3 string)
+ |STORED AS orc
+ |LOCATION '${s"${dir.getCanonicalPath}/l1/"}'""".stripMargin
+sql(orcTblStatement1)
+
+val orcTblInsertL1 =
+  s"INSERT INTO TABLE orc_tbl1 VALUES (1, 1, 'orc1'), (2, 2, 
'orc2')".stripMargin
+sql(orcTblInsertL1)
+
+val orcTblStatement2 =
+s"""
+   |CREATE EXTERNAL TABLE orc_tbl2(
+   |  c1 int,
+   |  c2 int,
+   |  c3 string)
+   |STORED AS orc
+   |LOCATION '${s"${dir.getCanonicalPath}/l1/l2/"}'""".stripMargin
+sql(orcTblStatement2)
+
+val orcTblInsertL2 =
+  s"INSERT INTO TABLE orc_tbl2 VALUES (3, 3, 'orc3'), (4, 4, 
'orc4')".stripMargin
+sql(orcTblInsertL2)
+
+val orcTblStatement3 =
+s"""
+   |CREATE EXTERNAL TABLE orc_tbl3(
+   |  c1 int,
+   |  c2 int,
+   |  c3 string)
+   |STORED AS orc
+   |LOCATION 
'${s"${dir.getCanonicalPath}/l1/l2/l3/"}'""".stripMargin
+sql(orcTblStatement3)
+
+val orcTblInsertL3 =
+  s"INSERT INTO TABLE orc_tbl3 VALUES (5, 5, 'orc5'), (6, 6, 
'orc6')".stripMargin
+sql(orcTblInsertL3)
+
+withTable("tbl1", "tbl2", "tbl3", "tbl4", "tbl5", "tbl6") {
+  val topDirStatement =
+s"""
+   |CREATE EXTERNAL TABLE tbl1(
+   |  c1 int,
+   |  c2 int,
+   |  c3 string)
+   |STORED AS orc
+   |LOCATION '${s"${dir.getCanonicalPath}"}'""".stripMargin
+  sql(topDirStatement)
+  val topDirSqlStatement = s"SELECT * FROM tbl1"
+  if (convertMetastore) {
+checkAnswer(sql(topDirSqlStatement), Nil)
+  } else {
+checkAnswer(sql(topDirSqlStatement),
+  (1 to 6).map(i => Row(i, i, s"orc$i")))
+  }
+
+  val l1DirStatement =
+s"""
+   |CREATE EXTERNAL TABLE tbl2(
+   |  c1 int,
+   |  c2 int,
+   |  c3 string)
+   |STORED AS orc
+   |LOCATION '${s"${dir.getCanonicalPath}/l1/"}'""".stripMargin
+  sql(l1DirStatement)
+  val l1DirSqlStatement = s"SELECT * FROM tbl2"
+  if (convertMetastore) {
+checkAnswer(sql(l1DirSqlStatement),
+  (1 to 2).map(i => Row(i, i, s"orc$i")))
+  } else {
+checkAnswer(sql(l1DirSqlStatement),
+  (1 to 6).map(i => Row(i, i, s"orc$i")))
+  }
+
+  val l2DirStatement =
+s"""
+   |CREATE EXTERNAL TABLE tbl3(
+   |  c1 int,
+   |  c2 int,
+   |  c3 string)
+   |STORED AS orc
+   |LOCATION 
'${s"${dir.getCanonicalPath}/l1/l2/"}'""".stripMargin
+  sql(l2DirStatement)
+  val l2DirSqlStatement = s"SELECT * FROM tbl3"
+  if (convertMetastore) {
+checkAnswer(sql(l2DirSqlStatement),
+  (3 to 4).map(i => Row(i, i, s"orc$i")))
+  } else {
+checkAnswer(sql(l2DirSqlStatement),
+  (3 to 6).map(i => Row(i, i, s"orc$i")))
+  }
+
+  val wildcardTopDirStatement =
+s"""
+   |CREATE EXTERNAL TABLE tbl4(
+   |  c1 int,
+   |  c2 int,
+   |  c3 string)
+   |STORED AS orc
+   |LOCATION '${new File(s"${dir}/*").toURI}'""".stripMargin
+  sql(wildcardTopDirStatement)
+  val wildcardTopDirSqlStatement = s"SELECT * FROM tbl4"
+  if (convertMetastore) 

[GitHub] [spark] kevinyu98 commented on a change in pull request #27130: [SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with subdirectories

2020-01-17 Thread GitBox
kevinyu98 commented on a change in pull request #27130: 
[SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with 
subdirectories
URL: https://github.com/apache/spark/pull/27130#discussion_r368083822
 
 

 ##
 File path: 
sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala
 ##
 @@ -170,4 +170,166 @@ class HiveOrcSourceSuite extends OrcSuite with 
TestHiveSingleton {
   test("SPARK-11412 read and merge orc schemas in parallel") {
 testMergeSchemasInParallel(OrcFileOperator.readOrcSchemasInParallel)
   }
+
+  test("SPARK-25993 CREATE EXTERNAL TABLE with subdirectories") {
+Seq(true, false).foreach { convertMetastore =>
+  withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> s"$convertMetastore") 
{
+withTempDir { dir =>
+  try {
+val orcTblStatement1 =
+  s"""
+ |CREATE EXTERNAL TABLE orc_tbl1(
+ |  c1 int,
+ |  c2 int,
+ |  c3 string)
+ |STORED AS orc
+ |LOCATION '${s"${dir.getCanonicalPath}/l1/"}'""".stripMargin
+sql(orcTblStatement1)
+
+val orcTblInsertL1 =
+  s"INSERT INTO TABLE orc_tbl1 VALUES (1, 1, 'orc1'), (2, 2, 
'orc2')".stripMargin
+sql(orcTblInsertL1)
+
+val orcTblStatement2 =
+s"""
+   |CREATE EXTERNAL TABLE orc_tbl2(
+   |  c1 int,
+   |  c2 int,
+   |  c3 string)
+   |STORED AS orc
+   |LOCATION '${s"${dir.getCanonicalPath}/l1/l2/"}'""".stripMargin
+sql(orcTblStatement2)
+
+val orcTblInsertL2 =
+  s"INSERT INTO TABLE orc_tbl2 VALUES (3, 3, 'orc3'), (4, 4, 
'orc4')".stripMargin
+sql(orcTblInsertL2)
+
+val orcTblStatement3 =
+s"""
+   |CREATE EXTERNAL TABLE orc_tbl3(
+   |  c1 int,
+   |  c2 int,
+   |  c3 string)
+   |STORED AS orc
+   |LOCATION 
'${s"${dir.getCanonicalPath}/l1/l2/l3/"}'""".stripMargin
+sql(orcTblStatement3)
+
+val orcTblInsertL3 =
+  s"INSERT INTO TABLE orc_tbl3 VALUES (5, 5, 'orc5'), (6, 6, 
'orc6')".stripMargin
+sql(orcTblInsertL3)
+
+withTable("tbl1", "tbl2", "tbl3", "tbl4", "tbl5", "tbl6") {
+  val topDirStatement =
+s"""
+   |CREATE EXTERNAL TABLE tbl1(
+   |  c1 int,
+   |  c2 int,
+   |  c3 string)
+   |STORED AS orc
+   |LOCATION '${s"${dir.getCanonicalPath}"}'""".stripMargin
+  sql(topDirStatement)
+  val topDirSqlStatement = s"SELECT * FROM tbl1"
+  if (convertMetastore) {
+checkAnswer(sql(topDirSqlStatement), Nil)
+  } else {
+checkAnswer(sql(topDirSqlStatement),
+  (1 to 6).map(i => Row(i, i, s"orc$i")))
+  }
+
+  val l1DirStatement =
+s"""
+   |CREATE EXTERNAL TABLE tbl2(
+   |  c1 int,
+   |  c2 int,
+   |  c3 string)
+   |STORED AS orc
+   |LOCATION '${s"${dir.getCanonicalPath}/l1/"}'""".stripMargin
+  sql(l1DirStatement)
+  val l1DirSqlStatement = s"SELECT * FROM tbl2"
+  if (convertMetastore) {
+checkAnswer(sql(l1DirSqlStatement),
+  (1 to 2).map(i => Row(i, i, s"orc$i")))
+  } else {
+checkAnswer(sql(l1DirSqlStatement),
+  (1 to 6).map(i => Row(i, i, s"orc$i")))
+  }
+
+  val l2DirStatement =
+s"""
+   |CREATE EXTERNAL TABLE tbl3(
+   |  c1 int,
+   |  c2 int,
+   |  c3 string)
+   |STORED AS orc
+   |LOCATION 
'${s"${dir.getCanonicalPath}/l1/l2/"}'""".stripMargin
+  sql(l2DirStatement)
+  val l2DirSqlStatement = s"SELECT * FROM tbl3"
+  if (convertMetastore) {
+checkAnswer(sql(l2DirSqlStatement),
+  (3 to 4).map(i => Row(i, i, s"orc$i")))
+  } else {
+checkAnswer(sql(l2DirSqlStatement),
+  (3 to 6).map(i => Row(i, i, s"orc$i")))
+  }
+
+  val wildcardTopDirStatement =
+s"""
+   |CREATE EXTERNAL TABLE tbl4(
+   |  c1 int,
+   |  c2 int,
+   |  c3 string)
+   |STORED AS orc
+   |LOCATION '${new File(s"${dir}/*").toURI}'""".stripMargin
+  sql(wildcardTopDirStatement)
+  val wildcardTopDirSqlStatement = s"SELECT * FROM tbl4"
+  if (convertMetastore) 

[GitHub] [spark] kevinyu98 commented on a change in pull request #27130: [SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with subdirectories

2020-01-17 Thread GitBox
kevinyu98 commented on a change in pull request #27130: 
[SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with 
subdirectories
URL: https://github.com/apache/spark/pull/27130#discussion_r368083648
 
 

 ##
 File path: 
sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala
 ##
 @@ -170,4 +170,166 @@ class HiveOrcSourceSuite extends OrcSuite with 
TestHiveSingleton {
   test("SPARK-11412 read and merge orc schemas in parallel") {
 testMergeSchemasInParallel(OrcFileOperator.readOrcSchemasInParallel)
   }
+
+  test("SPARK-25993 CREATE EXTERNAL TABLE with subdirectories") {
+Seq(true, false).foreach { convertMetastore =>
+  withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> s"$convertMetastore") 
{
+withTempDir { dir =>
+  try {
+val orcTblStatement1 =
+  s"""
+ |CREATE EXTERNAL TABLE orc_tbl1(
+ |  c1 int,
+ |  c2 int,
+ |  c3 string)
+ |STORED AS orc
+ |LOCATION '${s"${dir.getCanonicalPath}/l1/"}'""".stripMargin
+sql(orcTblStatement1)
+
+val orcTblInsertL1 =
+  s"INSERT INTO TABLE orc_tbl1 VALUES (1, 1, 'orc1'), (2, 2, 
'orc2')".stripMargin
+sql(orcTblInsertL1)
+
+val orcTblStatement2 =
+s"""
+   |CREATE EXTERNAL TABLE orc_tbl2(
+   |  c1 int,
+   |  c2 int,
+   |  c3 string)
+   |STORED AS orc
+   |LOCATION '${s"${dir.getCanonicalPath}/l1/l2/"}'""".stripMargin
+sql(orcTblStatement2)
+
+val orcTblInsertL2 =
+  s"INSERT INTO TABLE orc_tbl2 VALUES (3, 3, 'orc3'), (4, 4, 
'orc4')".stripMargin
+sql(orcTblInsertL2)
+
+val orcTblStatement3 =
+s"""
+   |CREATE EXTERNAL TABLE orc_tbl3(
+   |  c1 int,
+   |  c2 int,
+   |  c3 string)
+   |STORED AS orc
+   |LOCATION 
'${s"${dir.getCanonicalPath}/l1/l2/l3/"}'""".stripMargin
+sql(orcTblStatement3)
+
+val orcTblInsertL3 =
+  s"INSERT INTO TABLE orc_tbl3 VALUES (5, 5, 'orc5'), (6, 6, 
'orc6')".stripMargin
+sql(orcTblInsertL3)
+
+withTable("tbl1", "tbl2", "tbl3", "tbl4", "tbl5", "tbl6") {
+  val topDirStatement =
+s"""
+   |CREATE EXTERNAL TABLE tbl1(
+   |  c1 int,
+   |  c2 int,
+   |  c3 string)
+   |STORED AS orc
+   |LOCATION '${s"${dir.getCanonicalPath}"}'""".stripMargin
+  sql(topDirStatement)
+  val topDirSqlStatement = s"SELECT * FROM tbl1"
+  if (convertMetastore) {
+checkAnswer(sql(topDirSqlStatement), Nil)
+  } else {
+checkAnswer(sql(topDirSqlStatement),
+  (1 to 6).map(i => Row(i, i, s"orc$i")))
+  }
+
+  val l1DirStatement =
+s"""
+   |CREATE EXTERNAL TABLE tbl2(
+   |  c1 int,
+   |  c2 int,
+   |  c3 string)
+   |STORED AS orc
+   |LOCATION '${s"${dir.getCanonicalPath}/l1/"}'""".stripMargin
+  sql(l1DirStatement)
+  val l1DirSqlStatement = s"SELECT * FROM tbl2"
+  if (convertMetastore) {
+checkAnswer(sql(l1DirSqlStatement),
+  (1 to 2).map(i => Row(i, i, s"orc$i")))
+  } else {
+checkAnswer(sql(l1DirSqlStatement),
+  (1 to 6).map(i => Row(i, i, s"orc$i")))
+  }
+
+  val l2DirStatement =
+s"""
+   |CREATE EXTERNAL TABLE tbl3(
+   |  c1 int,
+   |  c2 int,
+   |  c3 string)
+   |STORED AS orc
+   |LOCATION 
'${s"${dir.getCanonicalPath}/l1/l2/"}'""".stripMargin
+  sql(l2DirStatement)
+  val l2DirSqlStatement = s"SELECT * FROM tbl3"
+  if (convertMetastore) {
+checkAnswer(sql(l2DirSqlStatement),
+  (3 to 4).map(i => Row(i, i, s"orc$i")))
+  } else {
+checkAnswer(sql(l2DirSqlStatement),
+  (3 to 6).map(i => Row(i, i, s"orc$i")))
 
 Review comment:
   


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: 

[GitHub] [spark] kevinyu98 commented on a change in pull request #27130: [SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with subdirectories

2020-01-17 Thread GitBox
kevinyu98 commented on a change in pull request #27130: 
[SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with 
subdirectories
URL: https://github.com/apache/spark/pull/27130#discussion_r368083468
 
 

 ##
 File path: 
sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala
 ##
 @@ -170,4 +170,166 @@ class HiveOrcSourceSuite extends OrcSuite with 
TestHiveSingleton {
   test("SPARK-11412 read and merge orc schemas in parallel") {
 testMergeSchemasInParallel(OrcFileOperator.readOrcSchemasInParallel)
   }
+
+  test("SPARK-25993 CREATE EXTERNAL TABLE with subdirectories") {
+Seq(true, false).foreach { convertMetastore =>
+  withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> s"$convertMetastore") 
{
+withTempDir { dir =>
+  try {
+val orcTblStatement1 =
+  s"""
+ |CREATE EXTERNAL TABLE orc_tbl1(
+ |  c1 int,
+ |  c2 int,
+ |  c3 string)
+ |STORED AS orc
+ |LOCATION '${s"${dir.getCanonicalPath}/l1/"}'""".stripMargin
+sql(orcTblStatement1)
+
+val orcTblInsertL1 =
+  s"INSERT INTO TABLE orc_tbl1 VALUES (1, 1, 'orc1'), (2, 2, 
'orc2')".stripMargin
+sql(orcTblInsertL1)
+
+val orcTblStatement2 =
+s"""
+   |CREATE EXTERNAL TABLE orc_tbl2(
+   |  c1 int,
+   |  c2 int,
+   |  c3 string)
+   |STORED AS orc
+   |LOCATION '${s"${dir.getCanonicalPath}/l1/l2/"}'""".stripMargin
+sql(orcTblStatement2)
+
+val orcTblInsertL2 =
+  s"INSERT INTO TABLE orc_tbl2 VALUES (3, 3, 'orc3'), (4, 4, 
'orc4')".stripMargin
+sql(orcTblInsertL2)
+
+val orcTblStatement3 =
+s"""
+   |CREATE EXTERNAL TABLE orc_tbl3(
+   |  c1 int,
+   |  c2 int,
+   |  c3 string)
+   |STORED AS orc
+   |LOCATION 
'${s"${dir.getCanonicalPath}/l1/l2/l3/"}'""".stripMargin
+sql(orcTblStatement3)
+
+val orcTblInsertL3 =
+  s"INSERT INTO TABLE orc_tbl3 VALUES (5, 5, 'orc5'), (6, 6, 
'orc6')".stripMargin
+sql(orcTblInsertL3)
+
+withTable("tbl1", "tbl2", "tbl3", "tbl4", "tbl5", "tbl6") {
+  val topDirStatement =
+s"""
+   |CREATE EXTERNAL TABLE tbl1(
+   |  c1 int,
+   |  c2 int,
+   |  c3 string)
+   |STORED AS orc
+   |LOCATION '${s"${dir.getCanonicalPath}"}'""".stripMargin
+  sql(topDirStatement)
+  val topDirSqlStatement = s"SELECT * FROM tbl1"
+  if (convertMetastore) {
+checkAnswer(sql(topDirSqlStatement), Nil)
+  } else {
+checkAnswer(sql(topDirSqlStatement),
+  (1 to 6).map(i => Row(i, i, s"orc$i")))
+  }
+
+  val l1DirStatement =
+s"""
+   |CREATE EXTERNAL TABLE tbl2(
+   |  c1 int,
+   |  c2 int,
+   |  c3 string)
+   |STORED AS orc
+   |LOCATION '${s"${dir.getCanonicalPath}/l1/"}'""".stripMargin
+  sql(l1DirStatement)
+  val l1DirSqlStatement = s"SELECT * FROM tbl2"
+  if (convertMetastore) {
+checkAnswer(sql(l1DirSqlStatement),
+  (1 to 2).map(i => Row(i, i, s"orc$i")))
 
 Review comment:
   sure


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] kevinyu98 commented on a change in pull request #27130: [SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with subdirectories

2020-01-17 Thread GitBox
kevinyu98 commented on a change in pull request #27130: 
[SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with 
subdirectories
URL: https://github.com/apache/spark/pull/27130#discussion_r368083514
 
 

 ##
 File path: 
sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala
 ##
 @@ -170,4 +170,166 @@ class HiveOrcSourceSuite extends OrcSuite with 
TestHiveSingleton {
   test("SPARK-11412 read and merge orc schemas in parallel") {
 testMergeSchemasInParallel(OrcFileOperator.readOrcSchemasInParallel)
   }
+
+  test("SPARK-25993 CREATE EXTERNAL TABLE with subdirectories") {
+Seq(true, false).foreach { convertMetastore =>
+  withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> s"$convertMetastore") 
{
+withTempDir { dir =>
+  try {
+val orcTblStatement1 =
+  s"""
+ |CREATE EXTERNAL TABLE orc_tbl1(
+ |  c1 int,
+ |  c2 int,
+ |  c3 string)
+ |STORED AS orc
+ |LOCATION '${s"${dir.getCanonicalPath}/l1/"}'""".stripMargin
+sql(orcTblStatement1)
+
+val orcTblInsertL1 =
+  s"INSERT INTO TABLE orc_tbl1 VALUES (1, 1, 'orc1'), (2, 2, 
'orc2')".stripMargin
+sql(orcTblInsertL1)
+
+val orcTblStatement2 =
+s"""
+   |CREATE EXTERNAL TABLE orc_tbl2(
+   |  c1 int,
+   |  c2 int,
+   |  c3 string)
+   |STORED AS orc
+   |LOCATION '${s"${dir.getCanonicalPath}/l1/l2/"}'""".stripMargin
+sql(orcTblStatement2)
+
+val orcTblInsertL2 =
+  s"INSERT INTO TABLE orc_tbl2 VALUES (3, 3, 'orc3'), (4, 4, 
'orc4')".stripMargin
+sql(orcTblInsertL2)
+
+val orcTblStatement3 =
+s"""
+   |CREATE EXTERNAL TABLE orc_tbl3(
+   |  c1 int,
+   |  c2 int,
+   |  c3 string)
+   |STORED AS orc
+   |LOCATION 
'${s"${dir.getCanonicalPath}/l1/l2/l3/"}'""".stripMargin
+sql(orcTblStatement3)
+
+val orcTblInsertL3 =
+  s"INSERT INTO TABLE orc_tbl3 VALUES (5, 5, 'orc5'), (6, 6, 
'orc6')".stripMargin
+sql(orcTblInsertL3)
+
+withTable("tbl1", "tbl2", "tbl3", "tbl4", "tbl5", "tbl6") {
+  val topDirStatement =
+s"""
+   |CREATE EXTERNAL TABLE tbl1(
+   |  c1 int,
+   |  c2 int,
+   |  c3 string)
+   |STORED AS orc
+   |LOCATION '${s"${dir.getCanonicalPath}"}'""".stripMargin
+  sql(topDirStatement)
+  val topDirSqlStatement = s"SELECT * FROM tbl1"
+  if (convertMetastore) {
+checkAnswer(sql(topDirSqlStatement), Nil)
+  } else {
+checkAnswer(sql(topDirSqlStatement),
+  (1 to 6).map(i => Row(i, i, s"orc$i")))
+  }
+
+  val l1DirStatement =
+s"""
+   |CREATE EXTERNAL TABLE tbl2(
+   |  c1 int,
+   |  c2 int,
+   |  c3 string)
+   |STORED AS orc
+   |LOCATION '${s"${dir.getCanonicalPath}/l1/"}'""".stripMargin
+  sql(l1DirStatement)
+  val l1DirSqlStatement = s"SELECT * FROM tbl2"
+  if (convertMetastore) {
+checkAnswer(sql(l1DirSqlStatement),
+  (1 to 2).map(i => Row(i, i, s"orc$i")))
+  } else {
+checkAnswer(sql(l1DirSqlStatement),
+  (1 to 6).map(i => Row(i, i, s"orc$i")))
 
 Review comment:
   done


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] kevinyu98 commented on a change in pull request #27130: [SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with subdirectories

2020-01-11 Thread GitBox
kevinyu98 commented on a change in pull request #27130: 
[SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with 
subdirectories
URL: https://github.com/apache/spark/pull/27130#discussion_r365561742
 
 

 ##
 File path: 
sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala
 ##
 @@ -170,4 +170,156 @@ class HiveOrcSourceSuite extends OrcSuite with 
TestHiveSingleton {
   test("SPARK-11412 read and merge orc schemas in parallel") {
 testMergeSchemasInParallel(OrcFileOperator.readOrcSchemasInParallel)
   }
+
+  test("SPARK-25993 CREATE EXTERNAL TABLE with subdirectories") {
+Seq(true, false).foreach { convertMetastore =>
+  withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> s"$convertMetastore") 
{
+withTempDir { dir =>
+  try {
+sql("USE default")
+sql(
+  """
+|CREATE EXTERNAL TABLE hive_orc(
 
 Review comment:
   @dongjoon-hyun Thanks for pointing out this. I was using other test cases 
without thinking too much. I have changed the name. I also replaced the 
hiveClient.runSqlHive for the insert stmt.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] kevinyu98 commented on a change in pull request #27130: [SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with subdirectories

2020-01-10 Thread GitBox
kevinyu98 commented on a change in pull request #27130: 
[SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with 
subdirectories
URL: https://github.com/apache/spark/pull/27130#discussion_r365352490
 
 

 ##
 File path: 
sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala
 ##
 @@ -222,4 +223,127 @@ class HiveParquetSourceSuite extends 
ParquetPartitioningTest {
   assert(df4.columns === Array("str", "max_int"))
 }
   }
+
+  test("SPARK-25993 CREATE EXTERNAL TABLE with subdirectories") {
+Seq("true", "false").foreach { parquetConversion =>
+  withSQLConf(HiveUtils.CONVERT_METASTORE_PARQUET.key -> 
parquetConversion) {
+withTempPath { path =>
+  withTable("tbl1", "tbl2", "tbl3", "tbl4", "tbl5", "tbl6") {
+val someDF1 = Seq((1, 1, "parq1"), (2, 2, "parq2")).
+  toDF("c1", "c2", "c3").repartition(1)
+val someDF2 = Seq((3, 3, "parq3"), (4, 4, "parq4")).
+  toDF("c1", "c2", "c3").repartition(1)
+val someDF3 = Seq((5, 5, "parq5"), (6, 6, "parq6")).
+  toDF("c1", "c2", "c3").repartition(1)
+someDF1.write.parquet(s"${path.getCanonicalPath}/l1/")
+someDF2.write.parquet(s"${path.getCanonicalPath}/l1/l2/")
+someDF3.write.parquet(s"${path.getCanonicalPath}/l1/l2/l3/")
+
+val topDirStatement =
+  s"""
+ |CREATE EXTERNAL TABLE tbl1(
+ |  c1 int,
+ |  c2 int,
+ |  c3 string)
+ |STORED AS parquet
+ |LOCATION '${s"${path.getCanonicalPath}"}'""".stripMargin
+sql(topDirStatement)
+if (parquetConversion == "true") {
+  checkAnswer(sql("SELECT * FROM tbl1"), Nil)
+} else {
+  val msg = intercept[IOException] {sql("SELECT * FROM 
tbl1").show()
+  }.getMessage
+assert(msg.contains("Not a file:"))
+}
+
+val l1DirStatement =
+  s"""
+ |CREATE EXTERNAL TABLE tbl2(
+ |  c1 int,
+ |  c2 int,
+ |  c3 string)
+ |STORED AS parquet
+ |LOCATION '${s"${path.getCanonicalPath}/l1/"}'""".stripMargin
+sql(l1DirStatement)
+if (parquetConversion == "true") {
+  checkAnswer(sql("SELECT * FROM tbl2"),
+(1 to 2).map(i => Row(i, i, s"parq$i")))
+} else {
+  val msg = intercept[IOException] {
+sql("SELECT * FROM tbl2").show()
+  }.getMessage
+  assert(msg.contains("Not a file:"))
+}
+
+val l2DirStatement =
+  s"""
+ |CREATE EXTERNAL TABLE tbl3(
+ |  c1 int,
+ |  c2 int,
+ |  c3 string)
+ |STORED AS parquet
+ |LOCATION 
'${s"${path.getCanonicalPath}/l1/l2/"}'""".stripMargin
+sql(l2DirStatement)
+if (parquetConversion == "true") {
+  checkAnswer(sql("SELECT * FROM tbl3"),
+(3 to 4).map(i => Row(i, i, s"parq$i")))
+} else {
+  val msg = intercept[IOException] {sql("SELECT * FROM 
tbl3").show()
 
 Review comment:
   moved to next line.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] kevinyu98 commented on a change in pull request #27130: [SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with subdirectories

2020-01-10 Thread GitBox
kevinyu98 commented on a change in pull request #27130: 
[SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with 
subdirectories
URL: https://github.com/apache/spark/pull/27130#discussion_r365352648
 
 

 ##
 File path: 
sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala
 ##
 @@ -222,4 +223,127 @@ class HiveParquetSourceSuite extends 
ParquetPartitioningTest {
   assert(df4.columns === Array("str", "max_int"))
 }
   }
+
+  test("SPARK-25993 CREATE EXTERNAL TABLE with subdirectories") {
+Seq("true", "false").foreach { parquetConversion =>
+  withSQLConf(HiveUtils.CONVERT_METASTORE_PARQUET.key -> 
parquetConversion) {
+withTempPath { path =>
+  withTable("tbl1", "tbl2", "tbl3", "tbl4", "tbl5", "tbl6") {
+val someDF1 = Seq((1, 1, "parq1"), (2, 2, "parq2")).
+  toDF("c1", "c2", "c3").repartition(1)
+val someDF2 = Seq((3, 3, "parq3"), (4, 4, "parq4")).
+  toDF("c1", "c2", "c3").repartition(1)
+val someDF3 = Seq((5, 5, "parq5"), (6, 6, "parq6")).
+  toDF("c1", "c2", "c3").repartition(1)
+someDF1.write.parquet(s"${path.getCanonicalPath}/l1/")
+someDF2.write.parquet(s"${path.getCanonicalPath}/l1/l2/")
+someDF3.write.parquet(s"${path.getCanonicalPath}/l1/l2/l3/")
+
+val topDirStatement =
+  s"""
+ |CREATE EXTERNAL TABLE tbl1(
+ |  c1 int,
+ |  c2 int,
+ |  c3 string)
+ |STORED AS parquet
+ |LOCATION '${s"${path.getCanonicalPath}"}'""".stripMargin
+sql(topDirStatement)
+if (parquetConversion == "true") {
+  checkAnswer(sql("SELECT * FROM tbl1"), Nil)
+} else {
+  val msg = intercept[IOException] {sql("SELECT * FROM 
tbl1").show()
+  }.getMessage
+assert(msg.contains("Not a file:"))
+}
+
+val l1DirStatement =
+  s"""
+ |CREATE EXTERNAL TABLE tbl2(
+ |  c1 int,
+ |  c2 int,
+ |  c3 string)
+ |STORED AS parquet
+ |LOCATION '${s"${path.getCanonicalPath}/l1/"}'""".stripMargin
+sql(l1DirStatement)
+if (parquetConversion == "true") {
+  checkAnswer(sql("SELECT * FROM tbl2"),
+(1 to 2).map(i => Row(i, i, s"parq$i")))
+} else {
+  val msg = intercept[IOException] {
+sql("SELECT * FROM tbl2").show()
+  }.getMessage
+  assert(msg.contains("Not a file:"))
+}
+
+val l2DirStatement =
+  s"""
+ |CREATE EXTERNAL TABLE tbl3(
+ |  c1 int,
+ |  c2 int,
+ |  c3 string)
+ |STORED AS parquet
+ |LOCATION 
'${s"${path.getCanonicalPath}/l1/l2/"}'""".stripMargin
+sql(l2DirStatement)
+if (parquetConversion == "true") {
+  checkAnswer(sql("SELECT * FROM tbl3"),
+(3 to 4).map(i => Row(i, i, s"parq$i")))
+} else {
+  val msg = intercept[IOException] {sql("SELECT * FROM 
tbl3").show()
+  }.getMessage
+  assert(msg.contains("Not a file:"))
+}
+
+val wildcardTopDirStatement =
+  s"""
+ |CREATE EXTERNAL TABLE tbl4(
+ |  c1 int,
+ |  c2 int,
+ |  c3 string)
+ |STORED AS parquet
+ |LOCATION '${new File(s"${path}/*").toURI}'""".stripMargin
+sql(wildcardTopDirStatement)
+if (parquetConversion == "true") {
+  checkAnswer(sql("SELECT * FROM tbl4"),
+(1 to 2).map(i => Row(i, i, s"parq$i")))
+} else {
+  val msg = intercept[IOException] {
+sql("SELECT * FROM tbl4").show()
+  }.getMessage
+  assert(msg.contains("Not a file:"))
+}
+
+val wildcardL1DirStatement =
+  s"""
+ |CREATE EXTERNAL TABLE tbl5(
+ |  c1 int,
+ |  c2 int,
+ |  c3 string)
+ |STORED AS parquet
+ |LOCATION '${new File(s"${path}/l1/*").toURI}'""".stripMargin
+sql(wildcardL1DirStatement)
+if (parquetConversion == "true") {
+  checkAnswer(sql("SELECT * FROM tbl5"),
+(1 to 4).map(i => Row(i, i, s"parq$i")))
+} else {
+  val msg = intercept[IOException] {sql("SELECT * FROM 
tbl5").show()
 
 Review comment:
   


This is an automated message from the Apache Git Service.
To respond to the message, please log on to 

[GitHub] [spark] kevinyu98 commented on a change in pull request #27130: [SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with subdirectories

2020-01-10 Thread GitBox
kevinyu98 commented on a change in pull request #27130: 
[SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with 
subdirectories
URL: https://github.com/apache/spark/pull/27130#discussion_r365350731
 
 

 ##
 File path: 
sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala
 ##
 @@ -222,4 +223,127 @@ class HiveParquetSourceSuite extends 
ParquetPartitioningTest {
   assert(df4.columns === Array("str", "max_int"))
 }
   }
+
+  test("SPARK-25993 CREATE EXTERNAL TABLE with subdirectories") {
+Seq("true", "false").foreach { parquetConversion =>
+  withSQLConf(HiveUtils.CONVERT_METASTORE_PARQUET.key -> 
parquetConversion) {
+withTempPath { path =>
+  withTable("tbl1", "tbl2", "tbl3", "tbl4", "tbl5", "tbl6") {
+val someDF1 = Seq((1, 1, "parq1"), (2, 2, "parq2")).
+  toDF("c1", "c2", "c3").repartition(1)
+val someDF2 = Seq((3, 3, "parq3"), (4, 4, "parq4")).
+  toDF("c1", "c2", "c3").repartition(1)
+val someDF3 = Seq((5, 5, "parq5"), (6, 6, "parq6")).
+  toDF("c1", "c2", "c3").repartition(1)
+someDF1.write.parquet(s"${path.getCanonicalPath}/l1/")
+someDF2.write.parquet(s"${path.getCanonicalPath}/l1/l2/")
+someDF3.write.parquet(s"${path.getCanonicalPath}/l1/l2/l3/")
+
+val topDirStatement =
+  s"""
+ |CREATE EXTERNAL TABLE tbl1(
+ |  c1 int,
+ |  c2 int,
+ |  c3 string)
+ |STORED AS parquet
+ |LOCATION '${s"${path.getCanonicalPath}"}'""".stripMargin
+sql(topDirStatement)
+if (parquetConversion == "true") {
+  checkAnswer(sql("SELECT * FROM tbl1"), Nil)
+} else {
+  val msg = intercept[IOException] {sql("SELECT * FROM 
tbl1").show()
+  }.getMessage
+assert(msg.contains("Not a file:"))
+}
+
+val l1DirStatement =
+  s"""
+ |CREATE EXTERNAL TABLE tbl2(
+ |  c1 int,
+ |  c2 int,
+ |  c3 string)
+ |STORED AS parquet
+ |LOCATION '${s"${path.getCanonicalPath}/l1/"}'""".stripMargin
+sql(l1DirStatement)
+if (parquetConversion == "true") {
+  checkAnswer(sql("SELECT * FROM tbl2"),
+(1 to 2).map(i => Row(i, i, s"parq$i")))
 
 Review comment:
   


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] kevinyu98 commented on a change in pull request #27130: [SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with subdirectories

2020-01-10 Thread GitBox
kevinyu98 commented on a change in pull request #27130: 
[SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with 
subdirectories
URL: https://github.com/apache/spark/pull/27130#discussion_r365348165
 
 

 ##
 File path: 
sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala
 ##
 @@ -222,4 +223,127 @@ class HiveParquetSourceSuite extends 
ParquetPartitioningTest {
   assert(df4.columns === Array("str", "max_int"))
 }
   }
+
+  test("SPARK-25993 CREATE EXTERNAL TABLE with subdirectories") {
+Seq("true", "false").foreach { parquetConversion =>
+  withSQLConf(HiveUtils.CONVERT_METASTORE_PARQUET.key -> 
parquetConversion) {
+withTempPath { path =>
+  withTable("tbl1", "tbl2", "tbl3", "tbl4", "tbl5", "tbl6") {
+val someDF1 = Seq((1, 1, "parq1"), (2, 2, "parq2")).
+  toDF("c1", "c2", "c3").repartition(1)
+val someDF2 = Seq((3, 3, "parq3"), (4, 4, "parq4")).
+  toDF("c1", "c2", "c3").repartition(1)
+val someDF3 = Seq((5, 5, "parq5"), (6, 6, "parq6")).
+  toDF("c1", "c2", "c3").repartition(1)
+someDF1.write.parquet(s"${path.getCanonicalPath}/l1/")
+someDF2.write.parquet(s"${path.getCanonicalPath}/l1/l2/")
+someDF3.write.parquet(s"${path.getCanonicalPath}/l1/l2/l3/")
+
+val topDirStatement =
+  s"""
+ |CREATE EXTERNAL TABLE tbl1(
+ |  c1 int,
+ |  c2 int,
+ |  c3 string)
+ |STORED AS parquet
+ |LOCATION '${s"${path.getCanonicalPath}"}'""".stripMargin
+sql(topDirStatement)
+if (parquetConversion == "true") {
+  checkAnswer(sql("SELECT * FROM tbl1"), Nil)
+} else {
+  val msg = intercept[IOException] {sql("SELECT * FROM 
tbl1").show()
+  }.getMessage
+assert(msg.contains("Not a file:"))
 
 Review comment:
   


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] kevinyu98 commented on a change in pull request #27130: [SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with subdirectories

2020-01-10 Thread GitBox
kevinyu98 commented on a change in pull request #27130: 
[SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with 
subdirectories
URL: https://github.com/apache/spark/pull/27130#discussion_r365347814
 
 

 ##
 File path: 
sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala
 ##
 @@ -222,4 +223,127 @@ class HiveParquetSourceSuite extends 
ParquetPartitioningTest {
   assert(df4.columns === Array("str", "max_int"))
 }
   }
+
+  test("SPARK-25993 CREATE EXTERNAL TABLE with subdirectories") {
+Seq("true", "false").foreach { parquetConversion =>
+  withSQLConf(HiveUtils.CONVERT_METASTORE_PARQUET.key -> 
parquetConversion) {
+withTempPath { path =>
+  withTable("tbl1", "tbl2", "tbl3", "tbl4", "tbl5", "tbl6") {
+val someDF1 = Seq((1, 1, "parq1"), (2, 2, "parq2")).
+  toDF("c1", "c2", "c3").repartition(1)
+val someDF2 = Seq((3, 3, "parq3"), (4, 4, "parq4")).
+  toDF("c1", "c2", "c3").repartition(1)
+val someDF3 = Seq((5, 5, "parq5"), (6, 6, "parq6")).
+  toDF("c1", "c2", "c3").repartition(1)
+someDF1.write.parquet(s"${path.getCanonicalPath}/l1/")
+someDF2.write.parquet(s"${path.getCanonicalPath}/l1/l2/")
+someDF3.write.parquet(s"${path.getCanonicalPath}/l1/l2/l3/")
+
+val topDirStatement =
+  s"""
+ |CREATE EXTERNAL TABLE tbl1(
+ |  c1 int,
+ |  c2 int,
+ |  c3 string)
+ |STORED AS parquet
+ |LOCATION '${s"${path.getCanonicalPath}"}'""".stripMargin
+sql(topDirStatement)
+if (parquetConversion == "true") {
+  checkAnswer(sql("SELECT * FROM tbl1"), Nil)
+} else {
+  val msg = intercept[IOException] {sql("SELECT * FROM 
tbl1").show()
 
 Review comment:
   thanks, changed.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] kevinyu98 commented on a change in pull request #27130: [SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with subdirectories

2020-01-09 Thread GitBox
kevinyu98 commented on a change in pull request #27130: 
[SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with 
subdirectories
URL: https://github.com/apache/spark/pull/27130#discussion_r364994517
 
 

 ##
 File path: 
sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala
 ##
 @@ -222,4 +223,115 @@ class HiveParquetSourceSuite extends 
ParquetPartitioningTest {
   assert(df4.columns === Array("str", "max_int"))
 }
   }
+
+  test("SPARK-25993 CREATE EXTERNAL TABLE with subdirectories") {
+Seq("true", "false").foreach { parquetConversion =>
+  withSQLConf(HiveUtils.CONVERT_METASTORE_PARQUET.key -> 
parquetConversion) {
+withTempPath { path =>
+  withTable("tbl1", "tbl2", "tbl3", "tbl4", "tbl5", "tbl6") {
+val someDF1 = Seq((1, 1, "parq1"), (2, 2, "parq2")).
+  toDF("c1", "c2", "c3").repartition(1)
+val someDF2 = Seq((3, 3, "parq3"), (4, 4, "parq4")).
+  toDF("c1", "c2", "c3").repartition(1)
+val someDF3 = Seq((5, 5, "parq5"), (6, 6, "parq6")).
+  toDF("c1", "c2", "c3").repartition(1)
+someDF1.write.parquet(s"${path.getCanonicalPath}/l1/")
+someDF2.write.parquet(s"${path.getCanonicalPath}/l1/l2/")
+someDF3.write.parquet(s"${path.getCanonicalPath}/l1/l2/l3/")
+
+val topDirStatement =
+  s"""
+ |CREATE EXTERNAL TABLE tbl1(
+ |  c1 int,
+ |  c2 int,
+ |  c3 string)
+ |STORED AS parquet
+ |LOCATION '${s"${path.getCanonicalPath}"}'""".stripMargin
+sql(topDirStatement)
+if (parquetConversion == "true") {
+  checkAnswer(sql("select * from tbl1"), Nil)
+} else {
+  intercept[IOException](sql("select * from tbl1").show())
+}
+
+val l1DirStatement =
+  s"""
+ |CREATE EXTERNAL TABLE tbl2(
+ |  c1 int,
+ |  c2 int,
+ |  c3 string)
+ |STORED AS parquet
+ |LOCATION '${s"${path.getCanonicalPath}/l1/"}'""".stripMargin
+sql(l1DirStatement)
+if (parquetConversion == "true") {
+  checkAnswer(sql("select * from tbl2"),
+(1 to 2).map(i => Row(i, i, s"parq$i")))
+} else {
+  intercept[IOException](sql("select * from tbl2").show())
+}
+
+val l2DirStatement =
+  s"""
+ |CREATE EXTERNAL TABLE tbl3(
+ |  c1 int,
+ |  c2 int,
+ |  c3 string)
+ |STORED AS parquet
+ |LOCATION 
'${s"${path.getCanonicalPath}/l1/l2/"}'""".stripMargin
+sql(l2DirStatement)
+if (parquetConversion == "true") {
+  checkAnswer(sql("select * from tbl3"),
+(3 to 4).map(i => Row(i, i, s"parq$i")))
+} else {
+  intercept[IOException](sql("select * from tbl3").show())
 
 Review comment:
   added the checking


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] kevinyu98 commented on a change in pull request #27130: [SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with subdirectories

2020-01-09 Thread GitBox
kevinyu98 commented on a change in pull request #27130: 
[SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with 
subdirectories
URL: https://github.com/apache/spark/pull/27130#discussion_r364994378
 
 

 ##
 File path: 
sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala
 ##
 @@ -222,4 +223,115 @@ class HiveParquetSourceSuite extends 
ParquetPartitioningTest {
   assert(df4.columns === Array("str", "max_int"))
 }
   }
+
+  test("SPARK-25993 CREATE EXTERNAL TABLE with subdirectories") {
+Seq("true", "false").foreach { parquetConversion =>
+  withSQLConf(HiveUtils.CONVERT_METASTORE_PARQUET.key -> 
parquetConversion) {
+withTempPath { path =>
+  withTable("tbl1", "tbl2", "tbl3", "tbl4", "tbl5", "tbl6") {
+val someDF1 = Seq((1, 1, "parq1"), (2, 2, "parq2")).
+  toDF("c1", "c2", "c3").repartition(1)
+val someDF2 = Seq((3, 3, "parq3"), (4, 4, "parq4")).
+  toDF("c1", "c2", "c3").repartition(1)
+val someDF3 = Seq((5, 5, "parq5"), (6, 6, "parq6")).
+  toDF("c1", "c2", "c3").repartition(1)
+someDF1.write.parquet(s"${path.getCanonicalPath}/l1/")
+someDF2.write.parquet(s"${path.getCanonicalPath}/l1/l2/")
+someDF3.write.parquet(s"${path.getCanonicalPath}/l1/l2/l3/")
+
+val topDirStatement =
+  s"""
+ |CREATE EXTERNAL TABLE tbl1(
+ |  c1 int,
+ |  c2 int,
+ |  c3 string)
+ |STORED AS parquet
+ |LOCATION '${s"${path.getCanonicalPath}"}'""".stripMargin
+sql(topDirStatement)
+if (parquetConversion == "true") {
+  checkAnswer(sql("select * from tbl1"), Nil)
+} else {
+  intercept[IOException](sql("select * from tbl1").show())
+}
+
+val l1DirStatement =
+  s"""
+ |CREATE EXTERNAL TABLE tbl2(
+ |  c1 int,
+ |  c2 int,
+ |  c3 string)
+ |STORED AS parquet
+ |LOCATION '${s"${path.getCanonicalPath}/l1/"}'""".stripMargin
+sql(l1DirStatement)
+if (parquetConversion == "true") {
+  checkAnswer(sql("select * from tbl2"),
+(1 to 2).map(i => Row(i, i, s"parq$i")))
+} else {
+  intercept[IOException](sql("select * from tbl2").show())
 
 Review comment:
   the whole exception message is 
   `Not a file: 
file:/Users/qianyangyu/IdeaProjects/spark/target/tmp/spark-abc8c1ad-4a3a-420f-b4fc-58d995be9bb0/l1`,
 I will check the first part `Not a file:`.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] kevinyu98 commented on a change in pull request #27130: [SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with subdirectories

2020-01-09 Thread GitBox
kevinyu98 commented on a change in pull request #27130: 
[SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with 
subdirectories
URL: https://github.com/apache/spark/pull/27130#discussion_r364994450
 
 

 ##
 File path: 
sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala
 ##
 @@ -222,4 +223,115 @@ class HiveParquetSourceSuite extends 
ParquetPartitioningTest {
   assert(df4.columns === Array("str", "max_int"))
 }
   }
+
+  test("SPARK-25993 CREATE EXTERNAL TABLE with subdirectories") {
+Seq("true", "false").foreach { parquetConversion =>
+  withSQLConf(HiveUtils.CONVERT_METASTORE_PARQUET.key -> 
parquetConversion) {
+withTempPath { path =>
+  withTable("tbl1", "tbl2", "tbl3", "tbl4", "tbl5", "tbl6") {
+val someDF1 = Seq((1, 1, "parq1"), (2, 2, "parq2")).
+  toDF("c1", "c2", "c3").repartition(1)
+val someDF2 = Seq((3, 3, "parq3"), (4, 4, "parq4")).
+  toDF("c1", "c2", "c3").repartition(1)
+val someDF3 = Seq((5, 5, "parq5"), (6, 6, "parq6")).
+  toDF("c1", "c2", "c3").repartition(1)
+someDF1.write.parquet(s"${path.getCanonicalPath}/l1/")
+someDF2.write.parquet(s"${path.getCanonicalPath}/l1/l2/")
+someDF3.write.parquet(s"${path.getCanonicalPath}/l1/l2/l3/")
+
+val topDirStatement =
+  s"""
+ |CREATE EXTERNAL TABLE tbl1(
+ |  c1 int,
+ |  c2 int,
+ |  c3 string)
+ |STORED AS parquet
+ |LOCATION '${s"${path.getCanonicalPath}"}'""".stripMargin
+sql(topDirStatement)
+if (parquetConversion == "true") {
+  checkAnswer(sql("select * from tbl1"), Nil)
+} else {
+  intercept[IOException](sql("select * from tbl1").show())
+}
+
+val l1DirStatement =
+  s"""
+ |CREATE EXTERNAL TABLE tbl2(
+ |  c1 int,
+ |  c2 int,
+ |  c3 string)
+ |STORED AS parquet
+ |LOCATION '${s"${path.getCanonicalPath}/l1/"}'""".stripMargin
+sql(l1DirStatement)
+if (parquetConversion == "true") {
+  checkAnswer(sql("select * from tbl2"),
+(1 to 2).map(i => Row(i, i, s"parq$i")))
+} else {
+  intercept[IOException](sql("select * from tbl2").show())
+}
+
+val l2DirStatement =
+  s"""
+ |CREATE EXTERNAL TABLE tbl3(
+ |  c1 int,
+ |  c2 int,
+ |  c3 string)
+ |STORED AS parquet
+ |LOCATION 
'${s"${path.getCanonicalPath}/l1/l2/"}'""".stripMargin
+sql(l2DirStatement)
+if (parquetConversion == "true") {
+  checkAnswer(sql("select * from tbl3"),
 
 Review comment:
   Changed.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] kevinyu98 commented on a change in pull request #27130: [SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with subdirectories

2020-01-09 Thread GitBox
kevinyu98 commented on a change in pull request #27130: 
[SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with 
subdirectories
URL: https://github.com/apache/spark/pull/27130#discussion_r364993517
 
 

 ##
 File path: 
sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala
 ##
 @@ -222,4 +223,115 @@ class HiveParquetSourceSuite extends 
ParquetPartitioningTest {
   assert(df4.columns === Array("str", "max_int"))
 }
   }
+
+  test("SPARK-25993 CREATE EXTERNAL TABLE with subdirectories") {
+Seq("true", "false").foreach { parquetConversion =>
+  withSQLConf(HiveUtils.CONVERT_METASTORE_PARQUET.key -> 
parquetConversion) {
+withTempPath { path =>
+  withTable("tbl1", "tbl2", "tbl3", "tbl4", "tbl5", "tbl6") {
+val someDF1 = Seq((1, 1, "parq1"), (2, 2, "parq2")).
+  toDF("c1", "c2", "c3").repartition(1)
+val someDF2 = Seq((3, 3, "parq3"), (4, 4, "parq4")).
+  toDF("c1", "c2", "c3").repartition(1)
+val someDF3 = Seq((5, 5, "parq5"), (6, 6, "parq6")).
+  toDF("c1", "c2", "c3").repartition(1)
+someDF1.write.parquet(s"${path.getCanonicalPath}/l1/")
+someDF2.write.parquet(s"${path.getCanonicalPath}/l1/l2/")
+someDF3.write.parquet(s"${path.getCanonicalPath}/l1/l2/l3/")
+
+val topDirStatement =
+  s"""
+ |CREATE EXTERNAL TABLE tbl1(
+ |  c1 int,
+ |  c2 int,
+ |  c3 string)
+ |STORED AS parquet
+ |LOCATION '${s"${path.getCanonicalPath}"}'""".stripMargin
+sql(topDirStatement)
+if (parquetConversion == "true") {
+  checkAnswer(sql("select * from tbl1"), Nil)
+} else {
+  intercept[IOException](sql("select * from tbl1").show())
+}
+
+val l1DirStatement =
+  s"""
+ |CREATE EXTERNAL TABLE tbl2(
+ |  c1 int,
+ |  c2 int,
+ |  c3 string)
+ |STORED AS parquet
+ |LOCATION '${s"${path.getCanonicalPath}/l1/"}'""".stripMargin
+sql(l1DirStatement)
+if (parquetConversion == "true") {
+  checkAnswer(sql("select * from tbl2"),
 
 Review comment:
   changed


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] kevinyu98 commented on a change in pull request #27130: [SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with subdirectories

2020-01-09 Thread GitBox
kevinyu98 commented on a change in pull request #27130: 
[SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with 
subdirectories
URL: https://github.com/apache/spark/pull/27130#discussion_r364952942
 
 

 ##
 File path: 
sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala
 ##
 @@ -170,4 +170,155 @@ class HiveOrcSourceSuite extends OrcSuite with 
TestHiveSingleton {
   test("SPARK-11412 read and merge orc schemas in parallel") {
 testMergeSchemasInParallel(OrcFileOperator.readOrcSchemasInParallel)
   }
+
+  test("SPARK-25993 CREATE EXTERNAL TABLE with subdirectories") {
+Seq(true, false).foreach { convertMetastore =>
+  withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> s"$convertMetastore") 
{
+withTempDir { dir =>
+  try {
+hiveClient.runSqlHive("USE default")
+hiveClient.runSqlHive(
 
 Review comment:
   sure, I will change to sql.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org