[GitHub] [carbondata] ajantha-bhat commented on a change in pull request #3861: [CARBONDATA-3922] Support order by limit push down for secondary index queries

GitBox Tue, 01 Sep 2020 07:46:38 -0700


ajantha-bhat commented on a change in pull request #3861:
URL: https://github.com/apache/carbondata/pull/3861#discussion_r481146873




##########
File path: 
integration/spark/src/main/scala/org/apache/spark/sql/secondaryindex/optimizer/CarbonSecondaryIndexOptimizer.scala
##########
@@ -824,6 +904,57 @@ class CarbonSecondaryIndexOptimizer(sparkSession: 
SparkSession) {
     }
   }
 
+  private def checkIfPushDownOrderByLimitAndNotNullFilter(literal: Literal, 
sort: Sort,
+      filter: Filter): Unit = {
+    // 1. check all the filter columns present in SI
+    val originalFilterAttributes = filter.condition collect {
+      case attr: AttributeReference =>
+        attr.name.toLowerCase
+    }
+    val filterAttributes = filter.condition collect {

Review comment:
       Agree. it was overlooked I guess. we cannot compare here. I moved this 
comparison in `createIndexFilterDataFrame` where I decide `needPushDown`

##########
File path: 
integration/spark/src/main/scala/org/apache/spark/sql/secondaryindex/optimizer/CarbonSecondaryIndexOptimizer.scala
##########
@@ -824,6 +904,57 @@ class CarbonSecondaryIndexOptimizer(sparkSession: 
SparkSession) {
     }
   }
 
+  private def checkIfPushDownOrderByLimitAndNotNullFilter(literal: Literal, 
sort: Sort,
+      filter: Filter): Unit = {
+    // 1. check all the filter columns present in SI
+    val originalFilterAttributes = filter.condition collect {
+      case attr: AttributeReference =>
+        attr.name.toLowerCase
+    }
+    val filterAttributes = filter.condition collect {
+      case attr: AttributeReference => attr.name.toLowerCase
+    }
+    val indexTableRelation = MatchIndexableRelation.unapply(filter.child).get
+    val matchingIndexTables = CarbonCostBasedOptimizer.identifyRequiredTables(
+      filterAttributes.toSet.asJava,
+      
CarbonIndexUtil.getSecondaryIndexes(indexTableRelation).mapValues(_.toList.asJava).asJava)
+      .asScala
+    val databaseName = filter.child.asInstanceOf[LogicalRelation].relation

Review comment:
       done

##########
File path: 
integration/spark/src/main/scala/org/apache/spark/sql/secondaryindex/optimizer/CarbonSecondaryIndexOptimizer.scala
##########
@@ -824,6 +904,57 @@ class CarbonSecondaryIndexOptimizer(sparkSession: 
SparkSession) {
     }
   }
 
+  private def checkIfPushDownOrderByLimitAndNotNullFilter(literal: Literal, 
sort: Sort,
+      filter: Filter): Unit = {
+    // 1. check all the filter columns present in SI
+    val originalFilterAttributes = filter.condition collect {
+      case attr: AttributeReference =>
+        attr.name.toLowerCase
+    }
+    val filterAttributes = filter.condition collect {
+      case attr: AttributeReference => attr.name.toLowerCase
+    }
+    val indexTableRelation = MatchIndexableRelation.unapply(filter.child).get
+    val matchingIndexTables = CarbonCostBasedOptimizer.identifyRequiredTables(
+      filterAttributes.toSet.asJava,
+      
CarbonIndexUtil.getSecondaryIndexes(indexTableRelation).mapValues(_.toList.asJava).asJava)
+      .asScala
+    val databaseName = filter.child.asInstanceOf[LogicalRelation].relation
+      .asInstanceOf[CarbonDatasourceHadoopRelation].carbonRelation.databaseName
+    // filter out all the index tables which are disabled
+    val enabledMatchingIndexTables = matchingIndexTables
+      .filter(table => {
+        sparkSession.sessionState.catalog
+          .getTableMetadata(TableIdentifier(table,
+            Some(databaseName))).storage
+          .properties
+          .getOrElse("isSITableEnabled", "true").equalsIgnoreCase("true")
+      })
+    // 2. check if only one SI matches for the filter columns
+    if (enabledMatchingIndexTables.nonEmpty && enabledMatchingIndexTables.size 
== 1 &&
+        filterAttributes.intersect(originalFilterAttributes).size ==
+        originalFilterAttributes.size) {
+      // 3. check if all the sort columns is in SI
+      val sortColumns = sort
+        .order
+        .map(_.child.asInstanceOf[AttributeReference].name.toLowerCase())
+        .toSet
+      val indexCarbonTable = CarbonEnv
+        .getCarbonTable(Some(databaseName), 
enabledMatchingIndexTables.head)(sparkSession)
+      var allColumnsFound = true

Review comment:
       done

##########
File path: 
integration/spark/src/main/scala/org/apache/spark/sql/secondaryindex/optimizer/CarbonSecondaryIndexOptimizer.scala
##########
@@ -58,6 +58,16 @@ object NodeType extends Enumeration {
  */
 class CarbonSecondaryIndexOptimizer(sparkSession: SparkSession) {
 
+  // to store the sort node per query
+  var sortNodeForPushDown: Sort = _
+
+  // to store the limit literal per query
+  var limitLiteral : Literal = _
+
+  // by default do not push down notNull filter,
+  // but for orderby limit push down, push down notNull filter also. Else we 
get wrong results.
+  var pushDownNotNullFilter : Boolean = _

Review comment:
       because too many functions need to change to pass arguments. 
   
   I used default arguments and changed required places now. so it is local 
variable now




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]

[GitHub] [carbondata] ajantha-bhat commented on a change in pull request #3861: [CARBONDATA-3922] Support order by limit push down for secondary index queries

Reply via email to