wangyum commented on issue #24774: [SPARK-27899][SQL] Make 
HiveMetastoreClient.getTableObjectsByName available in 
ExternalCatalog/SessionCatalog API
URL: https://github.com/apache/spark/pull/24774#issuecomment-500339944
 
 
   I did a simple benchmark in our production environment(The **default** 
database has `1626` tables):
   ```scala
   cat <<EOF > SPARK-27899.scala
   def benchmark(func: () => Unit): Long = {
     val start = System.currentTimeMillis()
     for(i <- 0 until 2) { func() }
     val end = System.currentTimeMillis()
     end - start
   }
   
   def default(): Unit = {
     val list = new java.util.ArrayList[Array[AnyRef]]()
     val catalog = spark.sessionState.catalog
     catalog.listTables("default").foreach { tableIdentifier =>
       val catalogTable = catalog.getTableMetadata(tableIdentifier)
       val rowData = Array[AnyRef](
         "",
         catalogTable.database,
         catalogTable.identifier.table,
         catalogTable.tableType,
         catalogTable.comment.getOrElse(""))
       list.add(rowData)
     }
   }
   
   def spark_27899(): Unit = {
     val list = new java.util.ArrayList[Array[AnyRef]]()
     val catalog = spark.sessionState.catalog
     catalog.getTablesByName(catalog.listTables("default")).foreach { 
catalogTable =>
       val rowData = Array[AnyRef](
         "",
         catalogTable.database,
         catalogTable.identifier.table,
         catalogTable.tableType,
         catalogTable.comment.getOrElse(""))
       list.add(rowData)
     }
   }
   
   val defaultTimeToken = benchmark(() => default)
   val spark27899TimeToken = benchmark(() => spark_27899)
   println(s"Default time token: $defaultTimeToken")
   println(s"SPARK-27899 time token: $spark27899TimeToken")
   EOF
   ```
   Benchmark result:
   ```
   Default time token: 317983
   SPARK-27899 time token: 58977
   ```

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to