[GitHub] [spark] amaliujia commented on a diff in pull request #36586: [SPARK-39236][SQL] Make CreateTable and ListTables be compatible with 3 layer namespace

2022-06-07 Thread GitBox


amaliujia commented on code in PR #36586:
URL: https://github.com/apache/spark/pull/36586#discussion_r891887309


##
sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala:
##
@@ -553,4 +571,103 @@ class CatalogSuite extends SharedSparkSession with 
AnalysisTest {
 }.getMessage
 assert(errMsg.contains("my_temp_table is a temp view. 
'recoverPartitions()' expects a table"))
   }
+
+  test("three layer namespace compatibility - create managed table") {
+val catalogName = "testcat"
+val dbName = "my_db"
+val tableName = "my_table"
+val tableSchema = new StructType().add("i", "int")
+val description = "this is a test table"
+
+val df = spark.catalog.createTable(
+  tableName = Array(catalogName, dbName, tableName).mkString("."),
+  source = classOf[FakeV2Provider].getName,
+  schema = tableSchema,
+  description = description,
+  options = Map.empty[String, String])
+assert(df.schema.equals(tableSchema))
+
+val testCatalog =
+  spark.sessionState.catalogManager.catalog(catalogName).asTableCatalog
+val table = testCatalog.loadTable(Identifier.of(Array(dbName), tableName))
+assert(table.schema().equals(tableSchema))
+
assert(table.properties().get("provider").equals(classOf[FakeV2Provider].getName))
+assert(table.properties().get("comment").equals(description))
+  }
+
+  test("three layer namespace compatibility - create external table") {
+withTempDir { dir =>
+  val catalogName = "testcat"
+  val dbName = "my_db"
+  val tableName = "my_table"
+  val tableSchema = new StructType().add("i", "int")
+  val description = "this is a test table"
+
+  val df = spark.catalog.createTable(
+tableName = Array(catalogName, dbName, tableName).mkString("."),
+source = classOf[FakeV2Provider].getName,
+schema = tableSchema,
+description = description,
+options = Map("path" -> dir.getAbsolutePath))
+  assert(df.schema.equals(tableSchema))
+
+  val testCatalog =
+spark.sessionState.catalogManager.catalog("testcat").asTableCatalog
+  val table = testCatalog.loadTable(Identifier.of(Array(dbName), 
tableName))
+  assert(table.schema().equals(tableSchema))
+  
assert(table.properties().get("provider").equals(classOf[FakeV2Provider].getName))
+  assert(table.properties().get("comment").equals(description))
+  assert(table.properties().get("path").equals(dir.getAbsolutePath))
+  assert(table.properties().get("external").equals("true"))
+  assert(table.properties().get("location").equals("file:" + 
dir.getAbsolutePath))
+}
+  }
+
+  test("three layer namespace compatibility - list tables") {
+withTempDir { dir =>
+  val catalogName = "testcat"
+  val dbName = "my_db"
+  val tableName = "my_table"
+  val tableSchema = new StructType().add("i", "int")
+  val description = "this is a test managed table"
+
+  spark.catalog.createTable(
+tableName = Array(catalogName, dbName, tableName).mkString("."),
+source = classOf[FakeV2Provider].getName,
+schema = tableSchema,
+description = description,
+options = Map.empty[String, String])
+
+  val tableName2 = "my_table2"
+  val description2 = "this is a test external table"
+
+  spark.catalog.createTable(
+tableName = Array(catalogName, dbName, tableName2).mkString("."),
+source = classOf[FakeV2Provider].getName,
+schema = tableSchema,
+description = description2,
+options = Map("path" -> dir.getAbsolutePath))
+
+  val tables = spark.catalog.listTables("testcat.my_db").collect()
+  assert(tables.size == 2)
+
+  val expectedTable1 =
+new Table(tableName, catalogName, Array(dbName), description,
+  CatalogTableType.MANAGED.name, false)
+  assert(tables.exists(t =>
+expectedTable1.name.equals(t.name) && 
expectedTable1.database.equals(t.database) &&
+expectedTable1.description.equals(t.description) &&
+expectedTable1.tableType.equals(t.tableType) &&
+expectedTable1.isTemporary == t.isTemporary))
+
+  val expectedTable2 =
+new Table(tableName2, catalogName, Array(dbName), description2,
+  CatalogTableType.EXTERNAL.name, false)
+  assert(tables.exists(t =>
+expectedTable2.name.equals(t.name) && 
expectedTable2.database.equals(t.database) &&
+expectedTable2.description.equals(t.description) &&
+expectedTable2.tableType.equals(t.tableType) &&
+expectedTable2.isTemporary == t.isTemporary))
+}

Review Comment:
   Sure I will add this test case.
   
   I won't call this is to test `backward compatibility` though. `listTable` 
should always accept DB name: either it is a `database` or it is a 
`catalog_name.database`.
   
   This is more like to test corretness. 



-- 
This is an automated message from the Apache Git Service.
To respond 

[GitHub] [spark] amaliujia commented on a diff in pull request #36586: [SPARK-39236][SQL] Make CreateTable and ListTables be compatible with 3 layer namespace

2022-06-07 Thread GitBox


amaliujia commented on code in PR #36586:
URL: https://github.com/apache/spark/pull/36586#discussion_r891563533


##
sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala:
##
@@ -117,14 +128,44 @@ class CatalogImpl(sparkSession: SparkSession) extends 
Catalog {
   case NonFatal(_) => None
 }
 val isTemp = sessionCatalog.isTempView(tableIdent)
+val qualifier =
+  
Array(metadata.map(_.identifier.database).getOrElse(tableIdent.database).orNull)
 new Table(
   name = tableIdent.table,

Review Comment:
   done



##
sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala:
##
@@ -117,14 +128,44 @@ class CatalogImpl(sparkSession: SparkSession) extends 
Catalog {
   case NonFatal(_) => None
 }
 val isTemp = sessionCatalog.isTempView(tableIdent)
+val qualifier =

Review Comment:
   hm updated.



##
sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala:
##
@@ -97,8 +98,18 @@ class CatalogImpl(sparkSession: SparkSession) extends 
Catalog {
*/
   @throws[AnalysisException]("database does not exist")
   override def listTables(dbName: String): Dataset[Table] = {
-val tables = sessionCatalog.listTables(dbName).map(makeTable)
-CatalogImpl.makeDataset(tables, sparkSession)
+if (sessionCatalog.databaseExists(dbName)) {

Review Comment:
   done



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] amaliujia commented on a diff in pull request #36586: [SPARK-39236][SQL] Make CreateTable and ListTables be compatible with 3 layer namespace

2022-06-07 Thread GitBox


amaliujia commented on code in PR #36586:
URL: https://github.com/apache/spark/pull/36586#discussion_r891537461


##
sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala:
##
@@ -553,4 +570,100 @@ class CatalogSuite extends SharedSparkSession with 
AnalysisTest {
 }.getMessage
 assert(errMsg.contains("my_temp_table is a temp view. 
'recoverPartitions()' expects a table"))
   }
+
+  test("three layer namespace compatibility - create managed table") {
+spark.conf.set("spark.sql.catalog.testcat", 
classOf[InMemoryCatalog].getName)
+val catalogName = "testcat"
+val dbName = "my_db"
+val tableName = "my_table"
+val tableSchema = new StructType().add("i", "int")
+val description = "this is a test table"
+
+val df = spark.catalog.createTable(
+  tableName = Array(catalogName, dbName, tableName).mkString("."),
+  source = classOf[FakeV2Provider].getName,
+  schema = tableSchema,
+  description = description,
+  options = Map.empty[String, String])
+assert(df.schema.equals(tableSchema))
+
+val testCatalog =
+  spark.sessionState.catalogManager.catalog("testcat").asTableCatalog
+val table = testCatalog.loadTable(Identifier.of(Array(dbName), tableName))
+assert(table.schema().equals(tableSchema))
+
assert(table.properties().get("provider").equals(classOf[FakeV2Provider].getName))
+assert(table.properties().get("comment").equals(description))
+  }
+
+  test("three layer namespace compatibility - create external table") {
+withTempDir { dir =>
+  val catalogName = "testcat"
+  val dbName = "my_db"
+  val tableName = "my_table"
+  val tableSchema = new StructType().add("i", "int")
+  val description = "this is a test table"
+
+  val df = spark.catalog.createTable(
+tableName = Array(catalogName, dbName, tableName).mkString("."),
+source = classOf[FakeV2Provider].getName,
+schema = tableSchema,
+description = description,
+options = Map("path" -> dir.getAbsolutePath))
+  assert(df.schema.equals(tableSchema))
+
+  val testCatalog =
+spark.sessionState.catalogManager.catalog("testcat").asTableCatalog
+  val table = testCatalog.loadTable(Identifier.of(Array(dbName), 
tableName))
+  assert(table.schema().equals(tableSchema))
+  
assert(table.properties().get("provider").equals(classOf[FakeV2Provider].getName))
+  assert(table.properties().get("comment").equals(description))
+  assert(table.properties().get("path").equals(dir.getAbsolutePath))
+}
+  }
+
+  test("three layer namespace compatibility - list tables") {

Review Comment:
   Actually we still need the change in `listTables`: 
   
   existing users do not use 3-part name and they just use 2 part name. In the 
case of the `listTables`, existing users use `dbname` directly (so just `b` but 
not `a.b`). In this case, there is a choice to decide which catalog it is, and 
the default catalog is hive metastore.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] amaliujia commented on a diff in pull request #36586: [SPARK-39236][SQL] Make CreateTable and ListTables be compatible with 3 layer namespace

2022-06-07 Thread GitBox


amaliujia commented on code in PR #36586:
URL: https://github.com/apache/spark/pull/36586#discussion_r891534128


##
sql/core/src/main/scala/org/apache/spark/sql/catalog/interface.scala:
##
@@ -64,15 +65,34 @@ class Database(
 @Stable
 class Table(
 val name: String,
-@Nullable val database: String,
+@Nullable val catalog: String,
+@Nullable val namespace: Array[String],
 @Nullable val description: String,
 val tableType: String,
 val isTemporary: Boolean)
   extends DefinedByConstructorParams {
 
+  def this(name: String, database: String, description: String, tableType: 
String,
+isTemporary: Boolean) = {
+this(name, null, Array(database), description, tableType, isTemporary)
+  }
+
+  def database: String = {
+if (namespace == null) {
+  null
+} else if (namespace.length == 2) {

Review Comment:
   Right I think we only care about array.length = 1 which is the database.



##
sql/core/src/main/scala/org/apache/spark/sql/catalog/interface.scala:
##
@@ -64,15 +65,34 @@ class Database(
 @Stable
 class Table(
 val name: String,
-@Nullable val database: String,
+@Nullable val catalog: String,
+@Nullable val namespace: Array[String],
 @Nullable val description: String,
 val tableType: String,
 val isTemporary: Boolean)
   extends DefinedByConstructorParams {
 
+  def this(name: String, database: String, description: String, tableType: 
String,
+isTemporary: Boolean) = {
+this(name, null, Array(database), description, tableType, isTemporary)
+  }
+
+  def database: String = {
+if (namespace == null) {
+  null
+} else if (namespace.length == 2) {

Review Comment:
   Right I think we only care about array.length = 1 which is the database. 
Will remove it.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] amaliujia commented on a diff in pull request #36586: [SPARK-39236][SQL] Make CreateTable and ListTables be compatible with 3 layer namespace

2022-06-07 Thread GitBox


amaliujia commented on code in PR #36586:
URL: https://github.com/apache/spark/pull/36586#discussion_r891497676


##
sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala:
##
@@ -553,4 +570,100 @@ class CatalogSuite extends SharedSparkSession with 
AnalysisTest {
 }.getMessage
 assert(errMsg.contains("my_temp_table is a temp view. 
'recoverPartitions()' expects a table"))
   }
+
+  test("three layer namespace compatibility - create managed table") {
+spark.conf.set("spark.sql.catalog.testcat", 
classOf[InMemoryCatalog].getName)
+val catalogName = "testcat"
+val dbName = "my_db"
+val tableName = "my_table"
+val tableSchema = new StructType().add("i", "int")
+val description = "this is a test table"
+
+val df = spark.catalog.createTable(
+  tableName = Array(catalogName, dbName, tableName).mkString("."),
+  source = classOf[FakeV2Provider].getName,
+  schema = tableSchema,
+  description = description,
+  options = Map.empty[String, String])
+assert(df.schema.equals(tableSchema))
+
+val testCatalog =
+  spark.sessionState.catalogManager.catalog("testcat").asTableCatalog
+val table = testCatalog.loadTable(Identifier.of(Array(dbName), tableName))
+assert(table.schema().equals(tableSchema))
+
assert(table.properties().get("provider").equals(classOf[FakeV2Provider].getName))
+assert(table.properties().get("comment").equals(description))
+  }
+
+  test("three layer namespace compatibility - create external table") {
+withTempDir { dir =>
+  val catalogName = "testcat"
+  val dbName = "my_db"
+  val tableName = "my_table"
+  val tableSchema = new StructType().add("i", "int")
+  val description = "this is a test table"
+
+  val df = spark.catalog.createTable(
+tableName = Array(catalogName, dbName, tableName).mkString("."),
+source = classOf[FakeV2Provider].getName,
+schema = tableSchema,
+description = description,
+options = Map("path" -> dir.getAbsolutePath))
+  assert(df.schema.equals(tableSchema))
+
+  val testCatalog =
+spark.sessionState.catalogManager.catalog("testcat").asTableCatalog
+  val table = testCatalog.loadTable(Identifier.of(Array(dbName), 
tableName))
+  assert(table.schema().equals(tableSchema))
+  
assert(table.properties().get("provider").equals(classOf[FakeV2Provider].getName))
+  assert(table.properties().get("comment").equals(description))
+  assert(table.properties().get("path").equals(dir.getAbsolutePath))
+}
+  }
+
+  test("three layer namespace compatibility - list tables") {

Review Comment:
   yeah if you agree I will remove unnecessary change.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] amaliujia commented on a diff in pull request #36586: [SPARK-39236][SQL] Make CreateTable and ListTables be compatible with 3 layer namespace

2022-06-03 Thread GitBox


amaliujia commented on code in PR #36586:
URL: https://github.com/apache/spark/pull/36586#discussion_r889237990


##
sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala:
##
@@ -553,4 +570,100 @@ class CatalogSuite extends SharedSparkSession with 
AnalysisTest {
 }.getMessage
 assert(errMsg.contains("my_temp_table is a temp view. 
'recoverPartitions()' expects a table"))
   }
+
+  test("three layer namespace compatibility - create managed table") {
+spark.conf.set("spark.sql.catalog.testcat", 
classOf[InMemoryCatalog].getName)
+val catalogName = "testcat"
+val dbName = "my_db"
+val tableName = "my_table"
+val tableSchema = new StructType().add("i", "int")
+val description = "this is a test table"
+
+val df = spark.catalog.createTable(
+  tableName = Array(catalogName, dbName, tableName).mkString("."),
+  source = classOf[FakeV2Provider].getName,
+  schema = tableSchema,
+  description = description,
+  options = Map.empty[String, String])
+assert(df.schema.equals(tableSchema))
+
+val testCatalog =
+  spark.sessionState.catalogManager.catalog("testcat").asTableCatalog
+val table = testCatalog.loadTable(Identifier.of(Array(dbName), tableName))
+assert(table.schema().equals(tableSchema))
+
assert(table.properties().get("provider").equals(classOf[FakeV2Provider].getName))
+assert(table.properties().get("comment").equals(description))
+  }
+
+  test("three layer namespace compatibility - create external table") {
+withTempDir { dir =>
+  val catalogName = "testcat"
+  val dbName = "my_db"
+  val tableName = "my_table"
+  val tableSchema = new StructType().add("i", "int")
+  val description = "this is a test table"
+
+  val df = spark.catalog.createTable(
+tableName = Array(catalogName, dbName, tableName).mkString("."),
+source = classOf[FakeV2Provider].getName,
+schema = tableSchema,
+description = description,
+options = Map("path" -> dir.getAbsolutePath))
+  assert(df.schema.equals(tableSchema))
+
+  val testCatalog =
+spark.sessionState.catalogManager.catalog("testcat").asTableCatalog
+  val table = testCatalog.loadTable(Identifier.of(Array(dbName), 
tableName))
+  assert(table.schema().equals(tableSchema))
+  
assert(table.properties().get("provider").equals(classOf[FakeV2Provider].getName))
+  assert(table.properties().get("comment").equals(description))
+  assert(table.properties().get("path").equals(dir.getAbsolutePath))
+}
+  }
+
+  test("three layer namespace compatibility - list tables") {

Review Comment:
   It turns out our code does not support `a.b` as table/database name:
   
   ```
   `also_table_name.my_db` is not a valid name for tables/databases. Valid 
names only contain alphabet characters, numbers and _.
   org.apache.spark.sql.AnalysisException: `also_table_name.my_db` is not a 
valid name for tables/databases. Valid names only contain alphabet characters, 
numbers and _.
at 
org.apache.spark.sql.errors.QueryCompilationErrors$.invalidNameForTableOrDatabaseError(QueryCompilationErrors.scala:572)
at 
org.apache.spark.sql.catalyst.catalog.SessionCatalog.validateName(SessionCatalog.scala:146)
at 
org.apache.spark.sql.catalyst.catalog.SessionCatalog.createTable(SessionCatalog.scala:350)
at 
org.apache.spark.sql.internal.CatalogSuite.createTable(CatalogSuite.scala:63)
at 
org.apache.spark.sql.internal.CatalogSuite.$anonfun$new$101(CatalogSuite.scala:677)
   ```
   
   In this case, we won't hit the ambiguity that a database name `a.b` could be 
either a database name or `a` is catalog name for `ListTables` API?
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] amaliujia commented on a diff in pull request #36586: [SPARK-39236][SQL] Make CreateTable and ListTables be compatible with 3 layer namespace

2022-06-03 Thread GitBox


amaliujia commented on code in PR #36586:
URL: https://github.com/apache/spark/pull/36586#discussion_r889237990


##
sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala:
##
@@ -553,4 +570,100 @@ class CatalogSuite extends SharedSparkSession with 
AnalysisTest {
 }.getMessage
 assert(errMsg.contains("my_temp_table is a temp view. 
'recoverPartitions()' expects a table"))
   }
+
+  test("three layer namespace compatibility - create managed table") {
+spark.conf.set("spark.sql.catalog.testcat", 
classOf[InMemoryCatalog].getName)
+val catalogName = "testcat"
+val dbName = "my_db"
+val tableName = "my_table"
+val tableSchema = new StructType().add("i", "int")
+val description = "this is a test table"
+
+val df = spark.catalog.createTable(
+  tableName = Array(catalogName, dbName, tableName).mkString("."),
+  source = classOf[FakeV2Provider].getName,
+  schema = tableSchema,
+  description = description,
+  options = Map.empty[String, String])
+assert(df.schema.equals(tableSchema))
+
+val testCatalog =
+  spark.sessionState.catalogManager.catalog("testcat").asTableCatalog
+val table = testCatalog.loadTable(Identifier.of(Array(dbName), tableName))
+assert(table.schema().equals(tableSchema))
+
assert(table.properties().get("provider").equals(classOf[FakeV2Provider].getName))
+assert(table.properties().get("comment").equals(description))
+  }
+
+  test("three layer namespace compatibility - create external table") {
+withTempDir { dir =>
+  val catalogName = "testcat"
+  val dbName = "my_db"
+  val tableName = "my_table"
+  val tableSchema = new StructType().add("i", "int")
+  val description = "this is a test table"
+
+  val df = spark.catalog.createTable(
+tableName = Array(catalogName, dbName, tableName).mkString("."),
+source = classOf[FakeV2Provider].getName,
+schema = tableSchema,
+description = description,
+options = Map("path" -> dir.getAbsolutePath))
+  assert(df.schema.equals(tableSchema))
+
+  val testCatalog =
+spark.sessionState.catalogManager.catalog("testcat").asTableCatalog
+  val table = testCatalog.loadTable(Identifier.of(Array(dbName), 
tableName))
+  assert(table.schema().equals(tableSchema))
+  
assert(table.properties().get("provider").equals(classOf[FakeV2Provider].getName))
+  assert(table.properties().get("comment").equals(description))
+  assert(table.properties().get("path").equals(dir.getAbsolutePath))
+}
+  }
+
+  test("three layer namespace compatibility - list tables") {

Review Comment:
   It turns out our code does not support `a.b` as table/database name:
   
   ```
   `also_name.my_db` is not a valid name for tables/databases. Valid names only 
contain alphabet characters, numbers and _.
   org.apache.spark.sql.AnalysisException: `also_name.my_db` is not a valid 
name for tables/databases. Valid names only contain alphabet characters, 
numbers and _.
at 
org.apache.spark.sql.errors.QueryCompilationErrors$.invalidNameForTableOrDatabaseError(QueryCompilationErrors.scala:572)
at 
org.apache.spark.sql.catalyst.catalog.SessionCatalog.validateName(SessionCatalog.scala:146)
at 
org.apache.spark.sql.catalyst.catalog.SessionCatalog.createTable(SessionCatalog.scala:350)
at 
org.apache.spark.sql.internal.CatalogSuite.createTable(CatalogSuite.scala:63)
at 
org.apache.spark.sql.internal.CatalogSuite.$anonfun$new$101(CatalogSuite.scala:677)
   ```
   
   In this case, we won't hit the ambiguity that a database name `a.b` could be 
either a database name or `a` is catalog name for `ListTables` API?
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] amaliujia commented on a diff in pull request #36586: [SPARK-39236][SQL] Make CreateTable and ListTables be compatible with 3 layer namespace

2022-06-03 Thread GitBox


amaliujia commented on code in PR #36586:
URL: https://github.com/apache/spark/pull/36586#discussion_r889237990


##
sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala:
##
@@ -553,4 +570,100 @@ class CatalogSuite extends SharedSparkSession with 
AnalysisTest {
 }.getMessage
 assert(errMsg.contains("my_temp_table is a temp view. 
'recoverPartitions()' expects a table"))
   }
+
+  test("three layer namespace compatibility - create managed table") {
+spark.conf.set("spark.sql.catalog.testcat", 
classOf[InMemoryCatalog].getName)
+val catalogName = "testcat"
+val dbName = "my_db"
+val tableName = "my_table"
+val tableSchema = new StructType().add("i", "int")
+val description = "this is a test table"
+
+val df = spark.catalog.createTable(
+  tableName = Array(catalogName, dbName, tableName).mkString("."),
+  source = classOf[FakeV2Provider].getName,
+  schema = tableSchema,
+  description = description,
+  options = Map.empty[String, String])
+assert(df.schema.equals(tableSchema))
+
+val testCatalog =
+  spark.sessionState.catalogManager.catalog("testcat").asTableCatalog
+val table = testCatalog.loadTable(Identifier.of(Array(dbName), tableName))
+assert(table.schema().equals(tableSchema))
+
assert(table.properties().get("provider").equals(classOf[FakeV2Provider].getName))
+assert(table.properties().get("comment").equals(description))
+  }
+
+  test("three layer namespace compatibility - create external table") {
+withTempDir { dir =>
+  val catalogName = "testcat"
+  val dbName = "my_db"
+  val tableName = "my_table"
+  val tableSchema = new StructType().add("i", "int")
+  val description = "this is a test table"
+
+  val df = spark.catalog.createTable(
+tableName = Array(catalogName, dbName, tableName).mkString("."),
+source = classOf[FakeV2Provider].getName,
+schema = tableSchema,
+description = description,
+options = Map("path" -> dir.getAbsolutePath))
+  assert(df.schema.equals(tableSchema))
+
+  val testCatalog =
+spark.sessionState.catalogManager.catalog("testcat").asTableCatalog
+  val table = testCatalog.loadTable(Identifier.of(Array(dbName), 
tableName))
+  assert(table.schema().equals(tableSchema))
+  
assert(table.properties().get("provider").equals(classOf[FakeV2Provider].getName))
+  assert(table.properties().get("comment").equals(description))
+  assert(table.properties().get("path").equals(dir.getAbsolutePath))
+}
+  }
+
+  test("three layer namespace compatibility - list tables") {

Review Comment:
   It turns out our code does not support `a.b` as table/database name:
   
   ```
   `also_table_name.my_table1` is not a valid name for tables/databases. Valid 
names only contain alphabet characters, numbers and _.
   org.apache.spark.sql.AnalysisException: `also_table_name.my_table1` is not a 
valid name for tables/databases. Valid names only contain alphabet characters, 
numbers and _.
at 
org.apache.spark.sql.errors.QueryCompilationErrors$.invalidNameForTableOrDatabaseError(QueryCompilationErrors.scala:572)
at 
org.apache.spark.sql.catalyst.catalog.SessionCatalog.validateName(SessionCatalog.scala:146)
at 
org.apache.spark.sql.catalyst.catalog.SessionCatalog.createTable(SessionCatalog.scala:350)
at 
org.apache.spark.sql.internal.CatalogSuite.createTable(CatalogSuite.scala:63)
at 
org.apache.spark.sql.internal.CatalogSuite.$anonfun$new$101(CatalogSuite.scala:677)
   ```
   
   In this case, we won't hit the ambiguity that a database name `a.b` could be 
either a database name or `a` is catalog name for `ListTables` API?
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] amaliujia commented on a diff in pull request #36586: [SPARK-39236][SQL] Make CreateTable and ListTables be compatible with 3 layer namespace

2022-06-03 Thread GitBox


amaliujia commented on code in PR #36586:
URL: https://github.com/apache/spark/pull/36586#discussion_r889237990


##
sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala:
##
@@ -553,4 +570,100 @@ class CatalogSuite extends SharedSparkSession with 
AnalysisTest {
 }.getMessage
 assert(errMsg.contains("my_temp_table is a temp view. 
'recoverPartitions()' expects a table"))
   }
+
+  test("three layer namespace compatibility - create managed table") {
+spark.conf.set("spark.sql.catalog.testcat", 
classOf[InMemoryCatalog].getName)
+val catalogName = "testcat"
+val dbName = "my_db"
+val tableName = "my_table"
+val tableSchema = new StructType().add("i", "int")
+val description = "this is a test table"
+
+val df = spark.catalog.createTable(
+  tableName = Array(catalogName, dbName, tableName).mkString("."),
+  source = classOf[FakeV2Provider].getName,
+  schema = tableSchema,
+  description = description,
+  options = Map.empty[String, String])
+assert(df.schema.equals(tableSchema))
+
+val testCatalog =
+  spark.sessionState.catalogManager.catalog("testcat").asTableCatalog
+val table = testCatalog.loadTable(Identifier.of(Array(dbName), tableName))
+assert(table.schema().equals(tableSchema))
+
assert(table.properties().get("provider").equals(classOf[FakeV2Provider].getName))
+assert(table.properties().get("comment").equals(description))
+  }
+
+  test("three layer namespace compatibility - create external table") {
+withTempDir { dir =>
+  val catalogName = "testcat"
+  val dbName = "my_db"
+  val tableName = "my_table"
+  val tableSchema = new StructType().add("i", "int")
+  val description = "this is a test table"
+
+  val df = spark.catalog.createTable(
+tableName = Array(catalogName, dbName, tableName).mkString("."),
+source = classOf[FakeV2Provider].getName,
+schema = tableSchema,
+description = description,
+options = Map("path" -> dir.getAbsolutePath))
+  assert(df.schema.equals(tableSchema))
+
+  val testCatalog =
+spark.sessionState.catalogManager.catalog("testcat").asTableCatalog
+  val table = testCatalog.loadTable(Identifier.of(Array(dbName), 
tableName))
+  assert(table.schema().equals(tableSchema))
+  
assert(table.properties().get("provider").equals(classOf[FakeV2Provider].getName))
+  assert(table.properties().get("comment").equals(description))
+  assert(table.properties().get("path").equals(dir.getAbsolutePath))
+}
+  }
+
+  test("three layer namespace compatibility - list tables") {

Review Comment:
   It turns out our code does not support `a.b` as table/database name:
   
   ```
   `also_table_name.my_table1` is not a valid name for tables/databases. Valid 
names only contain alphabet characters, numbers and _.
   org.apache.spark.sql.AnalysisException: `also_table_name.my_table1` is not a 
valid name for tables/databases. Valid names only contain alphabet characters, 
numbers and _.
at 
org.apache.spark.sql.errors.QueryCompilationErrors$.invalidNameForTableOrDatabaseError(QueryCompilationErrors.scala:572)
at 
org.apache.spark.sql.catalyst.catalog.SessionCatalog.validateName(SessionCatalog.scala:146)
at 
org.apache.spark.sql.catalyst.catalog.SessionCatalog.createTable(SessionCatalog.scala:350)
at 
org.apache.spark.sql.internal.CatalogSuite.createTable(CatalogSuite.scala:63)
at 
org.apache.spark.sql.internal.CatalogSuite.$anonfun$new$101(CatalogSuite.scala:677)
   ```
   
   In this case, we won't hit the ambiguity that a database name `a.b` could 
either a database name or `a` is catalog name for `ListTables` API?
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] amaliujia commented on a diff in pull request #36586: [SPARK-39236][SQL] Make CreateTable and ListTables be compatible with 3 layer namespace

2022-06-03 Thread GitBox


amaliujia commented on code in PR #36586:
URL: https://github.com/apache/spark/pull/36586#discussion_r889237990


##
sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala:
##
@@ -553,4 +570,100 @@ class CatalogSuite extends SharedSparkSession with 
AnalysisTest {
 }.getMessage
 assert(errMsg.contains("my_temp_table is a temp view. 
'recoverPartitions()' expects a table"))
   }
+
+  test("three layer namespace compatibility - create managed table") {
+spark.conf.set("spark.sql.catalog.testcat", 
classOf[InMemoryCatalog].getName)
+val catalogName = "testcat"
+val dbName = "my_db"
+val tableName = "my_table"
+val tableSchema = new StructType().add("i", "int")
+val description = "this is a test table"
+
+val df = spark.catalog.createTable(
+  tableName = Array(catalogName, dbName, tableName).mkString("."),
+  source = classOf[FakeV2Provider].getName,
+  schema = tableSchema,
+  description = description,
+  options = Map.empty[String, String])
+assert(df.schema.equals(tableSchema))
+
+val testCatalog =
+  spark.sessionState.catalogManager.catalog("testcat").asTableCatalog
+val table = testCatalog.loadTable(Identifier.of(Array(dbName), tableName))
+assert(table.schema().equals(tableSchema))
+
assert(table.properties().get("provider").equals(classOf[FakeV2Provider].getName))
+assert(table.properties().get("comment").equals(description))
+  }
+
+  test("three layer namespace compatibility - create external table") {
+withTempDir { dir =>
+  val catalogName = "testcat"
+  val dbName = "my_db"
+  val tableName = "my_table"
+  val tableSchema = new StructType().add("i", "int")
+  val description = "this is a test table"
+
+  val df = spark.catalog.createTable(
+tableName = Array(catalogName, dbName, tableName).mkString("."),
+source = classOf[FakeV2Provider].getName,
+schema = tableSchema,
+description = description,
+options = Map("path" -> dir.getAbsolutePath))
+  assert(df.schema.equals(tableSchema))
+
+  val testCatalog =
+spark.sessionState.catalogManager.catalog("testcat").asTableCatalog
+  val table = testCatalog.loadTable(Identifier.of(Array(dbName), 
tableName))
+  assert(table.schema().equals(tableSchema))
+  
assert(table.properties().get("provider").equals(classOf[FakeV2Provider].getName))
+  assert(table.properties().get("comment").equals(description))
+  assert(table.properties().get("path").equals(dir.getAbsolutePath))
+}
+  }
+
+  test("three layer namespace compatibility - list tables") {

Review Comment:
   It turns out our code does not support `a.b` as table name:
   
   ```
   `also_table_name.my_table1` is not a valid name for tables/databases. Valid 
names only contain alphabet characters, numbers and _.
   org.apache.spark.sql.AnalysisException: `also_table_name.my_table1` is not a 
valid name for tables/databases. Valid names only contain alphabet characters, 
numbers and _.
at 
org.apache.spark.sql.errors.QueryCompilationErrors$.invalidNameForTableOrDatabaseError(QueryCompilationErrors.scala:572)
at 
org.apache.spark.sql.catalyst.catalog.SessionCatalog.validateName(SessionCatalog.scala:146)
at 
org.apache.spark.sql.catalyst.catalog.SessionCatalog.createTable(SessionCatalog.scala:350)
at 
org.apache.spark.sql.internal.CatalogSuite.createTable(CatalogSuite.scala:63)
at 
org.apache.spark.sql.internal.CatalogSuite.$anonfun$new$101(CatalogSuite.scala:677)
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] amaliujia commented on a diff in pull request #36586: [SPARK-39236][SQL] Make CreateTable and ListTables be compatible with 3 layer namespace

2022-06-02 Thread GitBox


amaliujia commented on code in PR #36586:
URL: https://github.com/apache/spark/pull/36586#discussion_r888383953


##
sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala:
##
@@ -299,15 +313,18 @@ class CatalogSuite extends SharedSparkSession with 
AnalysisTest {
 val functionFields = 
ScalaReflection.getConstructorParameterValues(function)
 val columnFields = ScalaReflection.getConstructorParameterValues(column)
 assert(dbFields == Seq("nama", "descripta", "locata"))
-assert(tableFields == Seq("nama", "databasa", "descripta", "typa", false))
+assert(Seq(tableFields.apply(0), tableFields.apply(2), 
tableFields.apply(3),

Review Comment:
   I didn't do this is because the 3rd parameter is the namespace as an Array 
thus `==` does not do deep compare. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] amaliujia commented on a diff in pull request #36586: [SPARK-39236][SQL] Make CreateTable and ListTables be compatible with 3 layer namespace

2022-06-02 Thread GitBox


amaliujia commented on code in PR #36586:
URL: https://github.com/apache/spark/pull/36586#discussion_r888374320


##
sql/core/src/main/scala/org/apache/spark/sql/catalog/interface.scala:
##
@@ -64,12 +64,26 @@ class Database(
 @Stable
 class Table(
 val name: String,
-@Nullable val database: String,
+@Nullable val qualifier: Array[String],
 @Nullable val description: String,
 val tableType: String,
 val isTemporary: Boolean)
   extends DefinedByConstructorParams {
 
+  def database: String = parseQualifier
+
+  def parseQualifier: String = {
+if (qualifier == null) {
+  null
+} else if (qualifier.length == 2) {
+  qualifier(1)
+} else if (qualifier.length == 1) {

Review Comment:
   for example if a table is `catalog1.db1.table1`,
   
   In this case `catalog_name=catalog1, qualifier=[db1], table_name=table1`?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] amaliujia commented on a diff in pull request #36586: [SPARK-39236][SQL] Make CreateTable and ListTables be compatible with 3 layer namespace

2022-05-24 Thread GitBox


amaliujia commented on code in PR #36586:
URL: https://github.com/apache/spark/pull/36586#discussion_r881112118


##
sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala:
##
@@ -117,14 +128,42 @@ class CatalogImpl(sparkSession: SparkSession) extends 
Catalog {
   case NonFatal(_) => None
 }
 val isTemp = sessionCatalog.isTempView(tableIdent)
+val qualifier =
+  
Array(metadata.map(_.identifier.database).getOrElse(tableIdent.database).orNull)

Review Comment:
   Will use `:String` to solve this issue.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] amaliujia commented on a diff in pull request #36586: [SPARK-39236][SQL] Make CreateTable and ListTables be compatible with 3 layer namespace

2022-05-24 Thread GitBox


amaliujia commented on code in PR #36586:
URL: https://github.com/apache/spark/pull/36586#discussion_r880816481


##
sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala:
##
@@ -117,14 +128,42 @@ class CatalogImpl(sparkSession: SparkSession) extends 
Catalog {
   case NonFatal(_) => None
 }
 val isTemp = sessionCatalog.isTempView(tableIdent)
+val qualifier =
+  
Array(metadata.map(_.identifier.database).getOrElse(tableIdent.database).orNull)

Review Comment:
   ```
   /**
* Identifies a table in a database.
* If `database` is not defined, the current database is used.
* When we register a permanent function in the FunctionRegistry, we use
* unquotedString as the function name.
*/
   case class TableIdentifier(table: String, database: Option[String])
 extends IdentifierWithDatabase {
   
 override val identifier: String = table
   
 def this(table: String) = this(table, None)
   }
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] amaliujia commented on a diff in pull request #36586: [SPARK-39236][SQL] Make CreateTable and ListTables be compatible with 3 layer namespace

2022-05-24 Thread GitBox


amaliujia commented on code in PR #36586:
URL: https://github.com/apache/spark/pull/36586#discussion_r880815873


##
sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala:
##
@@ -117,14 +128,42 @@ class CatalogImpl(sparkSession: SparkSession) extends 
Catalog {
   case NonFatal(_) => None
 }
 val isTemp = sessionCatalog.isTempView(tableIdent)
+val qualifier =
+  
Array(metadata.map(_.identifier.database).getOrElse(tableIdent.database).orNull)

Review Comment:
   This is `makeTable(tableIdent: TableIdentifier)`, and the `TableIdentifier` 
contains no catalog name. Are you suggesting that I use `getCurrrentCatalog` 
(not exists but we can add one) to get the catalog name?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] amaliujia commented on a diff in pull request #36586: [SPARK-39236][SQL] Make CreateTable and ListTables be compatible with 3 layer namespace

2022-05-23 Thread GitBox


amaliujia commented on code in PR #36586:
URL: https://github.com/apache/spark/pull/36586#discussion_r879816903


##
sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala:
##
@@ -125,6 +135,31 @@ class CatalogImpl(sparkSession: SparkSession) extends 
Catalog {
   isTemporary = isTemp)
   }
 
+  private def makeTable(ident: Seq[String]): Table = {
+val plan = UnresolvedTableOrView(ident, "Catalog.listTables", true)
+val node = sparkSession.sessionState.executePlan(plan).analyzed
+node match {
+  case t: ResolvedTable =>
+val isExternal = t.table.properties().getOrDefault("external", 
"false").equals("true")
+new Table(
+  name = t.identifier.name(),
+  database = t.identifier.namespace().head,
+  description = t.table.properties().get("comment"),
+  tableType =
+if (isExternal) CatalogTableType.EXTERNAL.name
+else CatalogTableType.MANAGED.name,
+  isTemporary = false)
+  case v: ResolvedView =>
+new Table(

Review Comment:
   BTW this idea triggered a compatibility check issue
   ```
   spark-sql: Failed binary compatibility check against 
org.apache.spark:spark-sql_2.12:3.2.0! Found 1 potential problems (filtered 586)
   [error]  * method 
this(java.lang.String,java.lang.String,java.lang.String,java.lang.String,Boolean)Unit
 in class org.apache.spark.sql.catalog.Table's type is different in current 
version, where it is 
(java.lang.String,Array[java.lang.String],java.lang.String,java.lang.String,Boolean)Unit
 instead of 
(java.lang.String,java.lang.String,java.lang.String,java.lang.String,Boolean)Unit
   ```
   How do we usually solve such issues?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] amaliujia commented on a diff in pull request #36586: [SPARK-39236][SQL] Make CreateTable and ListTables be compatible with 3 layer namespace

2022-05-23 Thread GitBox


amaliujia commented on code in PR #36586:
URL: https://github.com/apache/spark/pull/36586#discussion_r879739463


##
sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala:
##
@@ -125,6 +135,31 @@ class CatalogImpl(sparkSession: SparkSession) extends 
Catalog {
   isTemporary = isTemp)
   }
 
+  private def makeTable(ident: Seq[String]): Table = {
+val plan = UnresolvedTableOrView(ident, "Catalog.listTables", true)
+val node = sparkSession.sessionState.executePlan(plan).analyzed
+node match {
+  case t: ResolvedTable =>
+val isExternal = t.table.properties().getOrDefault("external", 
"false").equals("true")
+new Table(
+  name = t.identifier.name(),
+  database = t.identifier.namespace().head,
+  description = t.table.properties().get("comment"),
+  tableType =
+if (isExternal) CatalogTableType.EXTERNAL.name
+else CatalogTableType.MANAGED.name,
+  isTemporary = false)
+  case v: ResolvedView =>
+new Table(

Review Comment:
   pushed a new commit. Can you take a look?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] amaliujia commented on a diff in pull request #36586: [SPARK-39236][SQL] Make CreateTable and ListTables be compatible with 3 layer namespace

2022-05-20 Thread GitBox


amaliujia commented on code in PR #36586:
URL: https://github.com/apache/spark/pull/36586#discussion_r878542464


##
sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala:
##
@@ -125,6 +135,31 @@ class CatalogImpl(sparkSession: SparkSession) extends 
Catalog {
   isTemporary = isTemp)
   }
 
+  private def makeTable(ident: Seq[String]): Table = {
+val plan = UnresolvedTableOrView(ident, "Catalog.listTables", true)
+val node = sparkSession.sessionState.executePlan(plan).analyzed
+node match {
+  case t: ResolvedTable =>
+val isExternal = t.table.properties().getOrDefault("external", 
"false").equals("true")
+new Table(
+  name = t.identifier.name(),
+  database = t.identifier.namespace().head,
+  description = t.table.properties().get("comment"),
+  tableType =
+if (isExternal) CatalogTableType.EXTERNAL.name
+else CatalogTableType.MANAGED.name,
+  isTemporary = false)
+  case v: ResolvedView =>
+new Table(

Review Comment:
   Why `database` is null if `qualifier.length` != 2?
   
   `qualifier` could contain only one string which is a database name? 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] amaliujia commented on a diff in pull request #36586: [SPARK-39236][SQL] Make CreateTable and ListTables be compatible with 3 layer namespace

2022-05-20 Thread GitBox


amaliujia commented on code in PR #36586:
URL: https://github.com/apache/spark/pull/36586#discussion_r878455439


##
sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala:
##
@@ -125,6 +135,31 @@ class CatalogImpl(sparkSession: SparkSession) extends 
Catalog {
   isTemporary = isTemp)
   }
 
+  private def makeTable(ident: Seq[String]): Table = {
+val plan = UnresolvedTableOrView(ident, "Catalog.listTables", true)
+val node = sparkSession.sessionState.executePlan(plan).analyzed
+node match {
+  case t: ResolvedTable =>
+val isExternal = t.table.properties().getOrDefault("external", 
"false").equals("true")
+new Table(
+  name = t.identifier.name(),
+  database = t.identifier.namespace().head,
+  description = t.table.properties().get("comment"),
+  tableType =
+if (isExternal) CatalogTableType.EXTERNAL.name
+else CatalogTableType.MANAGED.name,
+  isTemporary = false)
+  case v: ResolvedView =>
+new Table(
+  name = v.identifier.name(),
+  database = v.identifier.namespace().toString,
+  description = "",
+  tableType = "",

Review Comment:
   Good catch!



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] amaliujia commented on a diff in pull request #36586: [SPARK-39236][SQL] Make CreateTable and ListTables be compatible with 3 layer namespace

2022-05-20 Thread GitBox


amaliujia commented on code in PR #36586:
URL: https://github.com/apache/spark/pull/36586#discussion_r878354239


##
sql/core/src/main/scala/org/apache/spark/sql/catalog/interface.scala:
##
@@ -78,7 +78,7 @@ class Table(
   s"tableType='$tableType', " +
   s"isTemporary='$isTemporary']"
   }
-
+  

Review Comment:
   I will revert this.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] amaliujia commented on a diff in pull request #36586: [SPARK-39236][SQL] Make CreateTable and ListTables be compatible with 3 layer namespace

2022-05-19 Thread GitBox


amaliujia commented on code in PR #36586:
URL: https://github.com/apache/spark/pull/36586#discussion_r877754283


##
sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala:
##
@@ -97,8 +97,18 @@ class CatalogImpl(sparkSession: SparkSession) extends 
Catalog {
*/
   @throws[AnalysisException]("database does not exist")
   override def listTables(dbName: String): Dataset[Table] = {
-val tables = sessionCatalog.listTables(dbName).map(makeTable)
-CatalogImpl.makeDataset(tables, sparkSession)
+if (sessionCatalog.databaseExists(dbName)) {
+  val tables = sessionCatalog.listTables(dbName).map(makeTable)
+  CatalogImpl.makeDataset(tables, sparkSession)
+} else {
+  val multiParts = 
sparkSession.sessionState.sqlParser.parseMultipartIdentifier(dbName)
+  val plan = ShowTables(UnresolvedNamespace(multiParts), None)
+  val ret = sparkSession.sessionState.executePlan(plan).toRdd.collect()
+  val tables = ret
+.map(row => TableIdentifier(row.getString(1), Some(row.getString(0
+.map(makeTable)

Review Comment:
   done



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] amaliujia commented on a diff in pull request #36586: [SPARK-39236][SQL] Make CreateTable and ListTables be compatible with 3 layer namespace

2022-05-19 Thread GitBox


amaliujia commented on code in PR #36586:
URL: https://github.com/apache/spark/pull/36586#discussion_r877706031


##
sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala:
##
@@ -367,24 +377,40 @@ class CatalogImpl(sparkSession: SparkSession) extends 
Catalog {
   schema: StructType,
   description: String,
   options: Map[String, String]): DataFrame = {
-val tableIdent = 
sparkSession.sessionState.sqlParser.parseTableIdentifier(tableName)
+val idents = 
sparkSession.sessionState.sqlParser.parseMultipartIdentifier(tableName)
 val storage = DataSource.buildStorageFormatFromOptions(options)
 val tableType = if (storage.locationUri.isDefined) {
   CatalogTableType.EXTERNAL
 } else {
   CatalogTableType.MANAGED
 }
-val tableDesc = CatalogTable(
-  identifier = tableIdent,
-  tableType = tableType,
-  storage = storage,
-  schema = schema,
-  provider = Some(source),
-  comment = { if (description.isEmpty) None else Some(description) }
-)
-val plan = CreateTable(tableDesc, SaveMode.ErrorIfExists, None)
+val location = if (storage.locationUri.isDefined) {
+  val locationStr = storage.locationUri.get.toString
+  Some(locationStr)
+} else {
+  None
+}
+
+val tableSpec =
+  TableSpec(
+properties = Map(),
+provider = Some(source),
+options = options,
+location = location,
+comment = { if (description.isEmpty) None else Some(description) },
+serde = None,
+external = tableType == CatalogTableType.EXTERNAL)
+
+val plan =
+  CreateTable(
+name = UnresolvedDBObjectName(idents, isNamespace = true),

Review Comment:
   oops yes it should be false.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] amaliujia commented on a diff in pull request #36586: [SPARK-39236][SQL] Make CreateTable and ListTables be compatible with 3 layer namespace

2022-05-19 Thread GitBox


amaliujia commented on code in PR #36586:
URL: https://github.com/apache/spark/pull/36586#discussion_r877705884


##
sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala:
##
@@ -97,8 +97,18 @@ class CatalogImpl(sparkSession: SparkSession) extends 
Catalog {
*/
   @throws[AnalysisException]("database does not exist")
   override def listTables(dbName: String): Dataset[Table] = {
-val tables = sessionCatalog.listTables(dbName).map(makeTable)
-CatalogImpl.makeDataset(tables, sparkSession)
+if (sessionCatalog.databaseExists(dbName)) {
+  val tables = sessionCatalog.listTables(dbName).map(makeTable)
+  CatalogImpl.makeDataset(tables, sparkSession)
+} else {
+  val multiParts = 
sparkSession.sessionState.sqlParser.parseMultipartIdentifier(dbName)
+  val plan = ShowTables(UnresolvedNamespace(multiParts), None)
+  val ret = sparkSession.sessionState.executePlan(plan).toRdd.collect()
+  val tables = ret
+.map(row => TableIdentifier(row.getString(1), Some(row.getString(0
+.map(makeTable)

Review Comment:
   oh yes you are right. Let me add a version that looks up through analyzer.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org