hvanhovell commented on code in PR #40438: URL: https://github.com/apache/spark/pull/40438#discussion_r1138910434
########## connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala: ########## @@ -0,0 +1,628 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalog + +import scala.collection.JavaConverters._ + +import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset} +import org.apache.spark.sql.types.StructType + +/** + * Catalog interface for Spark. To access this, use `SparkSession.catalog`. + * + * @since 3.4.0 + */ +abstract class Catalog { + + /** + * Returns the current database (namespace) in this session. + * + * @since 3.4.0 + */ + def currentDatabase: String + + /** + * Sets the current database (namespace) in this session. + * + * @since 3.4.0 + */ + def setCurrentDatabase(dbName: String): Unit + + /** + * Returns a list of databases (namespaces) available within the current catalog. + * + * @since 3.4.0 + */ + def listDatabases(): Dataset[Database] + + /** + * Returns a list of tables/views in the current database (namespace). This includes all + * temporary views. + * + * @since 3.4.0 + */ + def listTables(): Dataset[Table] + + /** + * Returns a list of tables/views in the specified database (namespace) (the name can be + * qualified with catalog). This includes all temporary views. + * + * @since 3.4.0 + */ + @throws[AnalysisException]("database does not exist") + def listTables(dbName: String): Dataset[Table] + + /** + * Returns a list of functions registered in the current database (namespace). This includes all + * temporary functions. + * + * @since 3.4.0 + */ + def listFunctions(): Dataset[Function] + + /** + * Returns a list of functions registered in the specified database (namespace) (the name can be + * qualified with catalog). This includes all built-in and temporary functions. + * + * @since 3.4.0 + */ + @throws[AnalysisException]("database does not exist") + def listFunctions(dbName: String): Dataset[Function] + + /** + * Returns a list of columns for the given table/view or temporary view. + * + * @param tableName + * is either a qualified or unqualified name that designates a table/view. It follows the same + * resolution rule with SQL: search for temp views first then table/views in the current + * database (namespace). + * @since 3.4.0 + */ + @throws[AnalysisException]("table does not exist") + def listColumns(tableName: String): Dataset[Column] + + /** + * Returns a list of columns for the given table/view in the specified database under the Hive + * Metastore. + * + * To list columns for table/view in other catalogs, please use `listColumns(tableName)` with + * qualified table/view name instead. + * + * @param dbName + * is an unqualified name that designates a database. + * @param tableName + * is an unqualified name that designates a table/view. + * @since 3.4.0 + */ + @throws[AnalysisException]("database or table does not exist") + def listColumns(dbName: String, tableName: String): Dataset[Column] + + /** + * Get the database (namespace) with the specified name (can be qualified with catalog). This + * throws an AnalysisException when the database (namespace) cannot be found. + * + * @since 3.4.0 + */ + @throws[AnalysisException]("database does not exist") + def getDatabase(dbName: String): Database + + /** + * Get the table or view with the specified name. This table can be a temporary view or a + * table/view. This throws an AnalysisException when no Table can be found. + * + * @param tableName + * is either a qualified or unqualified name that designates a table/view. It follows the same + * resolution rule with SQL: search for temp views first then table/views in the current + * database (namespace). + * @since 3.4.0 + */ + @throws[AnalysisException]("table does not exist") + def getTable(tableName: String): Table + + /** + * Get the table or view with the specified name in the specified database under the Hive + * Metastore. This throws an AnalysisException when no Table can be found. + * + * To get table/view in other catalogs, please use `getTable(tableName)` with qualified + * table/view name instead. + * + * @since 3.4.0 + */ + @throws[AnalysisException]("database or table does not exist") + def getTable(dbName: String, tableName: String): Table + + /** + * Get the function with the specified name. This function can be a temporary function or a + * function. This throws an AnalysisException when the function cannot be found. + * + * @param functionName + * is either a qualified or unqualified name that designates a function. It follows the same + * resolution rule with SQL: search for built-in/temp functions first then functions in the + * current database (namespace). + * @since 3.4.0 + */ + @throws[AnalysisException]("function does not exist") + def getFunction(functionName: String): Function + + /** + * Get the function with the specified name in the specified database under the Hive Metastore. + * This throws an AnalysisException when the function cannot be found. + * + * To get functions in other catalogs, please use `getFunction(functionName)` with qualified + * function name instead. + * + * @param dbName + * is an unqualified name that designates a database. + * @param functionName + * is an unqualified name that designates a function in the specified database + * @since 3.4.0 + */ + @throws[AnalysisException]("database or function does not exist") + def getFunction(dbName: String, functionName: String): Function + + /** + * Check if the database (namespace) with the specified name exists (the name can be qualified + * with catalog). + * + * @since 3.4.0 + */ + def databaseExists(dbName: String): Boolean + + /** + * Check if the table or view with the specified name exists. This can either be a temporary + * view or a table/view. + * + * @param tableName + * is either a qualified or unqualified name that designates a table/view. It follows the same + * resolution rule with SQL: search for temp views first then table/views in the current + * database (namespace). + * @since 3.4.0 + */ + def tableExists(tableName: String): Boolean + + /** + * Check if the table or view with the specified name exists in the specified database under the + * Hive Metastore. + * + * To check existence of table/view in other catalogs, please use `tableExists(tableName)` with + * qualified table/view name instead. + * + * @param dbName + * is an unqualified name that designates a database. + * @param tableName + * is an unqualified name that designates a table. + * @since 3.4.0 + */ + def tableExists(dbName: String, tableName: String): Boolean + + /** + * Check if the function with the specified name exists. This can either be a temporary function + * or a function. + * + * @param functionName + * is either a qualified or unqualified name that designates a function. It follows the same + * resolution rule with SQL: search for built-in/temp functions first then functions in the + * current database (namespace). + * @since 3.4.0 + */ + def functionExists(functionName: String): Boolean + + /** + * Check if the function with the specified name exists in the specified database under the Hive + * Metastore. + * + * To check existence of functions in other catalogs, please use `functionExists(functionName)` + * with qualified function name instead. + * + * @param dbName + * is an unqualified name that designates a database. + * @param functionName + * is an unqualified name that designates a function. + * @since 3.4.0 + */ + def functionExists(dbName: String, functionName: String): Boolean + + /** + * Creates a table from the given path and returns the corresponding DataFrame. It will use the + * default data source configured by spark.sql.sources.default. + * + * @param tableName + * is either a qualified or unqualified name that designates a table. If no database + * identifier is provided, it refers to a table in the current database. + * @since 3.4.0 + */ + @deprecated("use createTable instead.", "2.2.0") Review Comment: Yeah let's keep them for now. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
