rdblue commented on a change in pull request #24768: [SPARK-27919][SQL] Add v2 session catalog URL: https://github.com/apache/spark/pull/24768#discussion_r292122308
########## File path: sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala ########## @@ -0,0 +1,278 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.datasources.v2 + +import java.util + +import scala.collection.JavaConverters._ +import scala.collection.mutable + +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalog.v2.{Identifier, TableCatalog, TableChange} +import org.apache.spark.sql.catalog.v2.expressions.{BucketTransform, FieldReference, IdentityTransform, LogicalExpressions, Transform} +import org.apache.spark.sql.catalog.v2.utils.CatalogV2Util +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.analysis.{NoSuchNamespaceException, NoSuchTableException, TableAlreadyExistsException} +import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogTable, CatalogTableType, CatalogUtils, SessionCatalog} +import org.apache.spark.sql.execution.datasources.DataSource +import org.apache.spark.sql.internal.SessionState +import org.apache.spark.sql.sources.v2.{Table, TableCapability, TableProvider} +import org.apache.spark.sql.types.StructType +import org.apache.spark.sql.util.CaseInsensitiveStringMap + +/** + * A [[TableCatalog]] that translates calls to the v1 SessionCatalog. + */ +class V2SessionCatalog(sessionState: SessionState) extends TableCatalog { + def this() = { + this(SparkSession.active.sessionState) + } + + private lazy val catalog: SessionCatalog = sessionState.catalog + + private var _name: String = _ + + override def name: String = _name + + override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = { + this._name = name + } + + override def listTables(namespace: Array[String]): Array[Identifier] = { + namespace match { + case Array(db) => + catalog.listTables(db).map(ident => Identifier.of(Array(db), ident.table)).toArray + case _ => + throw new NoSuchNamespaceException(namespace) + } + } + + def loadTable(ident: Identifier): Table = { + val catalogTable = try { + catalog.getTableMetadata(ident.asTableIdentifier) + } catch { + case _: NoSuchTableException => + throw new NoSuchTableException(ident) + } + + catalogTable.provider match { + case Some(provider) => + DataSource.lookupDataSource(provider, sessionState.conf).newInstance() match { + case tableProvider: TableProvider => + val storageOptions = ident.namespace match { + case Array(db) => + catalogTable.storage.properties + + ("provider" -> provider) + + ("database" -> db) + + ("table" -> ident.name) + case Array() => + catalogTable.storage.properties + + ("provider" -> provider) + + ("database" -> catalog.getCurrentDatabase) + + ("table" -> ident.name) + } + tableProvider.getTable(new CaseInsensitiveStringMap(storageOptions.asJava)) Review comment: No, that is not the correct solution for SPARK-27960. The user supplied schema is used when interpreting data that has no schema, like CSV or JSON. That's why it is called the user-supplied schema. In this case, the schema is stored in the metastore. The table provider can access it from the session metastore using the table and database options. I agree that this is strange and isn't a good way to handle these tables. I think it would be a good idea to change the `TableProvider` interface to accept more metadata from the metastore, but we should consider those options and should not reuse the user-supplied schema for something other than what the API was intended for. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected] With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
