carbondata git commit: [CARBONDATA-2103] Optimize show tables for filtering datamaps

manishgupta88 Thu, 22 Feb 2018 05:18:48 -0800

Repository: carbondata
Updated Branches:
  refs/heads/master cc4bc8100 -> 001795ce2



[CARBONDATA-2103] Optimize show tables for filtering datamaps

Problem
Show tables was taking more time as two times lookup was happening to filter 
out the datamaps

Solution
add a hive table property which is true for all tables and false for datamaps 
like preAggregate table and show tables filter out these tables
based on the property.

This closes #1980


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/001795ce
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/001795ce
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/001795ce

Branch: refs/heads/master
Commit: 001795ce28a3295d899deca96909b063aaf95cf1
Parents: cc4bc81
Author: akashrn5 <[email protected]>
Authored: Thu Feb 15 19:00:26 2018 +0530
Committer: manishgupta88 <[email protected]>
Committed: Thu Feb 22 18:50:35 2018 +0530

----------------------------------------------------------------------
 .../preaggregate/TestPreAggCreateCommand.scala  |  9 +++++
 .../testsuite/datamap/TestDataMapCommand.scala  |  7 +++-
 .../CreatePreAggregateTableCommand.scala        |  2 +-
 .../table/CarbonCreateTableCommand.scala        |  9 ++++-
 .../command/table/CarbonShowTablesCommand.scala | 40 ++++----------------
 5 files changed, 29 insertions(+), 38 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/001795ce/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/preaggregate/TestPreAggCreateCommand.scala
----------------------------------------------------------------------
diff --git 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/preaggregate/TestPreAggCreateCommand.scala
 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/preaggregate/TestPreAggCreateCommand.scala
index 6988adc..1e59a80 100644
--- 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/preaggregate/TestPreAggCreateCommand.scala
+++ 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/preaggregate/TestPreAggCreateCommand.scala
@@ -38,6 +38,7 @@ class TestPreAggCreateCommand extends QueryTest with 
BeforeAndAfterAll {
     sql("drop table if exists PreAggMain1")
     sql("drop table if exists PreAggMain2")
     sql("drop table if exists maintable")
+    sql("drop table if exists showTables")
     sql("create table preaggMain (a string, b string, c string) stored by 
'carbondata'")
     sql("create table preaggMain1 (a string, b string, c string) stored by 
'carbondata' tblProperties('DICTIONARY_INCLUDE' = 'a')")
     sql("create table preaggMain2 (a string, b string, c string) stored by 
'carbondata'")
@@ -376,6 +377,13 @@ class TestPreAggCreateCommand extends QueryTest with 
BeforeAndAfterAll {
     sql("DROP DATAMAP IF EXISTS agg0 ON TABLE maintable")
   }
 
+  test("test show tables filterted with datamaps"){
+    sql("create table showTables(name string, age int) stored by 'carbondata'")
+    sql("create datamap preAgg on table showTables using 'preaggregate' as 
select sum(age) from showTables")
+    sql("show tables").show()
+    assert(!sql("show tables").collect().contains("showTables_preagg"))
+  }
+
   def getCarbontable(plan: LogicalPlan) : CarbonTable ={
     var carbonTable : CarbonTable = null
     plan.transform {
@@ -403,5 +411,6 @@ class TestPreAggCreateCommand extends QueryTest with 
BeforeAndAfterAll {
     sql("drop table if exists PreAggMain1")
     sql("drop table if exists PreAggMain2")
     sql("drop table if exists maintabletime")
+    sql("drop table if exists showTables")
   }
 }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/001795ce/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/datamap/TestDataMapCommand.scala
----------------------------------------------------------------------
diff --git 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/datamap/TestDataMapCommand.scala
 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/datamap/TestDataMapCommand.scala
index 146ad62..f403b3e 100644
--- 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/datamap/TestDataMapCommand.scala
+++ 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/datamap/TestDataMapCommand.scala
@@ -130,9 +130,12 @@ class TestDataMapCommand extends QueryTest with 
BeforeAndAfterAll {
         true,
         "datamap_hiveMetaStoreTable_1")
 
+      sql("drop datamap datamap_hiveMetaStoreTable_1 on table 
hiveMetaStoreTable_1")
+      checkExistence(sql("show datamap on table hiveMetaStoreTable_1"),
+        false,
+        "datamap_hiveMetaStoreTable_1")
+      assert(sql("show datamap on table 
hiveMetaStoreTable_1").collect().length == 0)
       sql("drop table hiveMetaStoreTable_1")
-
-      checkExistence(sql("show tables"), false, "datamap_hiveMetaStoreTable_1")
     }
     finally {
       CarbonProperties.getInstance()

http://git-wip-us.apache.org/repos/asf/carbondata/blob/001795ce/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/preaaggregate/CreatePreAggregateTableCommand.scala
----------------------------------------------------------------------
diff --git 
a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/preaaggregate/CreatePreAggregateTableCommand.scala
 
b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/preaaggregate/CreatePreAggregateTableCommand.scala
index 46d885d..c861b00 100644
--- 
a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/preaaggregate/CreatePreAggregateTableCommand.scala
+++ 
b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/preaaggregate/CreatePreAggregateTableCommand.scala
@@ -117,7 +117,7 @@ case class CreatePreAggregateTableCommand(
       CarbonEnv.getTablePath(tableModel.databaseNameOp, 
tableModel.tableName)(sparkSession)
     }
     CarbonCreateTableCommand(TableNewProcessor(tableModel),
-      tableModel.ifNotExistsSet, Some(tablePath)).run(sparkSession)
+      tableModel.ifNotExistsSet, Some(tablePath), isVisible = 
false).run(sparkSession)
 
     val table = CarbonEnv.getCarbonTable(tableIdentifier)(sparkSession)
     val tableInfo = table.getTableInfo

http://git-wip-us.apache.org/repos/asf/carbondata/blob/001795ce/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/table/CarbonCreateTableCommand.scala
----------------------------------------------------------------------
diff --git 
a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/table/CarbonCreateTableCommand.scala
 
b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/table/CarbonCreateTableCommand.scala
index c4030d6..fc7c13a 100644
--- 
a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/table/CarbonCreateTableCommand.scala
+++ 
b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/table/CarbonCreateTableCommand.scala
@@ -38,7 +38,8 @@ case class CarbonCreateTableCommand(
     tableInfo: TableInfo,
     ifNotExistsSet: Boolean = false,
     tableLocation: Option[String] = None,
-    createDSTable: Boolean = true)
+    createDSTable: Boolean = true,
+    isVisible: Boolean = true)
   extends MetadataCommand {
 
   override def processMetadata(sparkSession: SparkSession): Seq[Row] = {
@@ -102,6 +103,9 @@ case class CarbonCreateTableCommand(
             } else {
               ""
             }
+          // isVisible property is added to hive table properties to 
differentiate between main
+          // table and datamaps(like preaggregate). It is false only for 
datamaps. This is added
+          // to improve the show tables performance when filtering the 
datamaps from main tables
           sparkSession.sql(
             s"""CREATE TABLE $dbName.$tableName
                |(${ rawSchema })
@@ -110,7 +114,8 @@ case class CarbonCreateTableCommand(
                |  tableName "$tableName",
                |  dbName "$dbName",
                |  tablePath "$tablePath",
-               |  path "$tablePath"
+               |  path "$tablePath",
+               |  isVisible "$isVisible"
                |  $carbonSchemaString)
                |  $partitionString
              """.stripMargin)

http://git-wip-us.apache.org/repos/asf/carbondata/blob/001795ce/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/table/CarbonShowTablesCommand.scala
----------------------------------------------------------------------
diff --git 
a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/table/CarbonShowTablesCommand.scala
 
b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/table/CarbonShowTablesCommand.scala
index c2a91d8..e3c4e97 100644
--- 
a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/table/CarbonShowTablesCommand.scala
+++ 
b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/table/CarbonShowTablesCommand.scala
@@ -39,44 +39,18 @@ private[sql] case class CarbonShowTablesCommand ( 
databaseName: Option[String],
   override def processMetadata(sparkSession: SparkSession): Seq[Row] = {
     // Since we need to return a Seq of rows, we will call getTables directly
     // instead of calling tables in sparkSession.
-    // filterDataMaps Method is to Filter the Table.
     val catalog = sparkSession.sessionState.catalog
     val db = databaseName.getOrElse(catalog.getCurrentDatabase)
     var tables =
       tableIdentifierPattern.map(catalog.listTables(db, 
_)).getOrElse(catalog.listTables(db))
-    tables = filterDataMaps(tables, sparkSession)
-    tables.map { tableIdent =>
-      val isTemp = catalog.isTemporaryTable(tableIdent)
-      Row(tableIdent.database.getOrElse("default"), tableIdent.table, isTemp)
+    val externalCatalog = sparkSession.sharedState.externalCatalog
+    // tables will be filtered for all the dataMaps to show only main tables
+    tables.collect {
+      case tableIdent if externalCatalog.getTable(db, 
tableIdent.table).storage.properties
+        .getOrElse("isVisible", true).toString.toBoolean =>
+        val isTemp = catalog.isTemporaryTable(tableIdent)
+        Row(tableIdent.database.getOrElse("default"), tableIdent.table, isTemp)
     }
   }
 
-  /**
-   *
-   * @param tables tableIdnetifers
-   * @param sparkSession sparksession
-   * @return  Tables after filter datamap tables
-   */
-  private def filterDataMaps(tables: Seq[TableIdentifier],
-      sparkSession: SparkSession): Seq[TableIdentifier] = {
-    // Filter carbon Tables then get CarbonTable and getDataMap List and 
filter the same
-    // as of now 2 times lookup is happening(filter  carbon table 
,getDataMapList)
-    // TODO : add another PR (CARBONDATA-2103) to improve  with 1 lookup
-    val allDatamapTable = tables.filter { table =>
-      CarbonEnv.getInstance(sparkSession).carbonMetastore
-        .tableExists(table)(sparkSession)
-    }.map { table =>
-      val ctable = CarbonEnv.getCarbonTable(table.database, 
table.table)(sparkSession)
-      ctable.getTableInfo.getDataMapSchemaList.asScala
-    }
-    val alldamrelation = allDatamapTable
-      .flatMap { table =>
-        table.map(eachtable => eachtable.getRelationIdentifier.toString)
-      }
-    tables
-      .filter { table =>
-        !alldamrelation
-          .contains(table.database.getOrElse("default") + "." + 
table.identifier)
-      }
-  }
 }

carbondata git commit: [CARBONDATA-2103] Optimize show tables for filtering datamaps

Reply via email to