akashrn5 commented on a change in pull request #3902:
URL: https://github.com/apache/carbondata/pull/3902#discussion_r481925690
##########
File path:
integration/spark/src/main/scala/org/apache/spark/sql/optimizer/CarbonFilters.scala
##########
@@ -373,6 +375,164 @@ object CarbonFilters {
val carbonTable = CarbonEnv.getCarbonTable(identifier)(sparkSession)
getPartitions(partitionFilters, sparkSession, carbonTable)
}
+
+ def getStorageOrdinal(filter: Filter, carbonTable: CarbonTable): Int = {
+ val column = filter.references.map(carbonTable.getColumnByName)
+ if (column.isEmpty) {
+ -1
+ } else {
+ if (column.head.isDimension) {
+ column.head.getOrdinal
+ } else {
+ column.head.getOrdinal + carbonTable.getAllDimensions.size()
+ }
+ }
+ }
+
+ def collectSimilarExpressions(filter: Filter, table: CarbonTable):
Seq[(Filter, Int)] = {
+ filter match {
+ case sources.And(left, right) =>
+ collectSimilarExpressions(left, table) ++
collectSimilarExpressions(right, table)
+ case sources.Or(left, right) => collectSimilarExpressions(left, table) ++
+ collectSimilarExpressions(right, table)
+ case others => Seq((others, getStorageOrdinal(others, table)))
+ }
+ }
+
+ /**
+ * This method will reorder the filter based on the Storage Ordinal of the
column references.
+ *
+ * Example1:
+ * And And
+ * Or And => Or And
+ * col3 col1 col2 col1 col1 col3 col1 col2
+ *
+ * **Mixed expression filter reordered locally, but wont be reordered
globally.**
+ *
+ * Example2:
+ * And And
+ * And And => And And
+ * col3 col1 col2 col1 col1 col1 col2 col3
+ *
+ * Or Or
+ * Or Or => Or Or
+ * col3 col1 col2 col1 col1 col1 col2 col3
+ *
+ * **Similar expression filters are reordered globally**
+ *
+ * @param filter the filter expression to be reordered
+ * @return The reordered filter with the current ordinal
+ */
+ def reorderFilter(filter: Filter, table: CarbonTable): (Filter, Int) = {
+ val filterMap = mutable.HashMap[String, List[(Filter, Int)]]()
+ // If the filter size is one then no need to reorder.
+ val sortedFilter = if (filter.references.toSet.size == 1) {
+ (filter, -1)
+ } else {
+ sortFilter(filter, filterMap, table)
+ }
+ // If filter has only AND/PR expression then sort the nodes globally using
the filterMap.
+ // Else sort the subnodes individually
+ if (!filterMap.contains("OR") && filterMap.contains("AND") &&
filterMap("AND").nonEmpty) {
+ val sortedFilterAndOrdinal = filterMap("AND").sortBy(_._2)
+ (sortedFilterAndOrdinal.map(_._1).reduce(sources.And),
sortedFilterAndOrdinal.head._2)
+ } else if (!filterMap.contains("AND") && filterMap.contains("OR") &&
+ filterMap("OR").nonEmpty) {
+ val sortedFilterAndOrdinal = filterMap("OR").sortBy(_._2)
+ (sortedFilterAndOrdinal.map(_._1).reduce(sources.Or),
sortedFilterAndOrdinal.head._2)
+ } else {
+ sortedFilter
+ }
+ }
+
+ def sortFilter(filter: Filter, filterMap: mutable.HashMap[String,
List[(Filter, Int)]],
+ table: CarbonTable): (Filter, Int) = {
+ filter match {
+ case sources.And(left, right) =>
Review comment:
as we can see, both `case sources.And(left, right)` and `case
sources.Or(left, right)` has same implementation except the filter names, and
traversal also same, i think if we can extract the complete case implementation
to method and pass filter types to reduce code and make more simple. can you
just see if its possible?
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]