[GitHub] [spark] gatorsmile commented on a change in pull request #24774: [SPARK-27899][SQL] Make HiveMetastoreClient.getTableObjectsByName available in ExternalCatalog/SessionCatalog API

GitBox Tue, 04 Jun 2019 17:06:16 -0700

gatorsmile commented on a change in pull request #24774: [SPARK-27899][SQL] 
Make HiveMetastoreClient.getTableObjectsByName available in 
ExternalCatalog/SessionCatalog API
URL: https://github.com/apache/spark/pull/24774#discussion_r290541004


 ##########
 File path: 
sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
 ##########
 @@ -384,127 +388,144 @@ private[hive] class HiveClientImpl(
     Option(client.getTable(dbName, tableName, false /* do not throw exception 
*/))
   }
 
+  private def getRawTablesByNames(dbName: String, tableNames: Seq[String]): 
Seq[HiveTable] = {
+    try {
+      msClient.getTableObjectsByName(dbName, 
tableNames.asJava).asScala.map(new HiveTable(_))
+    } catch {
+      case ex: Exception =>
+        throw new HiveException(s"Unable to fetch tables of db $dbName", ex);
+    }
+  }
+
   override def tableExists(dbName: String, tableName: String): Boolean = 
withHiveState {
     getRawTableOption(dbName, tableName).nonEmpty
   }
 
+  override def getTablesByName(
+      dbName: String,
+      tableNames: Seq[String]): Seq[CatalogTable] = withHiveState {
+    getRawTablesByNames(dbName, tableNames).map(convertHiveTableToCatalogTable)
+  }
+
   override def getTableOption(
       dbName: String,
       tableName: String): Option[CatalogTable] = withHiveState {
     logDebug(s"Looking up $dbName.$tableName")
-    getRawTableOption(dbName, tableName).map { h =>
-      // Note: Hive separates partition columns and the schema, but for us the
-      // partition columns are part of the schema
-      val cols = h.getCols.asScala.map(fromHiveColumn)
-      val partCols = h.getPartCols.asScala.map(fromHiveColumn)
-      val schema = StructType(cols ++ partCols)
-
-      val bucketSpec = if (h.getNumBuckets > 0) {
-        val sortColumnOrders = h.getSortCols.asScala
-        // Currently Spark only supports columns to be sorted in ascending 
order
-        // but Hive can support both ascending and descending order. If all 
the columns
-        // are sorted in ascending order, only then propagate the sortedness 
information
-        // to downstream processing / optimizations in Spark
-        // TODO: In future we can have Spark support columns sorted in 
descending order
-        val allAscendingSorted = sortColumnOrders.forall(_.getOrder == 
HIVE_COLUMN_ORDER_ASC)
-
-        val sortColumnNames = if (allAscendingSorted) {
-          sortColumnOrders.map(_.getCol)
-        } else {
-          Seq.empty
-        }
-        Option(BucketSpec(h.getNumBuckets, h.getBucketCols.asScala, 
sortColumnNames))
+    getRawTableOption(dbName, tableName).map(convertHiveTableToCatalogTable)
+  }
+
+  private def convertHiveTableToCatalogTable(h: HiveTable): CatalogTable = {
 
 Review comment:
   Nope. You can reuse this issue. 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] [spark] gatorsmile commented on a change in pull request #24774: [SPARK-27899][SQL] Make HiveMetastoreClient.getTableObjectsByName available in ExternalCatalog/SessionCatalog API

Reply via email to