nsivabalan commented on code in PR #12707:
URL: https://github.com/apache/hudi/pull/12707#discussion_r1929980758


##########
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/PartitionStatsIndexSupport.scala:
##########
@@ -86,47 +85,77 @@ class PartitionStatsIndexSupport(spark: SparkSession,
                       queryFilters: Seq[Expression],
                       queryReferencedColumns: Seq[String]): 
Option[Set[String]] = {
     if (isIndexAvailable && queryFilters.nonEmpty && 
queryReferencedColumns.nonEmpty) {
-      val readInMemory = shouldReadInMemory(fileIndex, queryReferencedColumns, 
inMemoryProjectionThreshold)
-      loadTransposed(queryReferencedColumns, readInMemory, Option.empty, 
Option.empty) {
-        transposedPartitionStatsDF => {
-          try {
-            
transposedPartitionStatsDF.persist(StorageLevel.MEMORY_AND_DISK_SER)
-            val allPartitions = 
transposedPartitionStatsDF.select(HoodieMetadataPayload.COLUMN_STATS_FIELD_FILE_NAME)
-              .collect()
-              .map(_.getString(0))
-              .toSet
-            if (allPartitions.nonEmpty) {
-              // PARTITION_STATS index exist for all or some columns in the 
filters
-              // NOTE: [[translateIntoColumnStatsIndexFilterExpr]] has covered 
the case where the
-              //       column in a filter does not have the stats available, 
by making sure such a
-              //       filter does not prune any partition.
-              val indexSchema = transposedPartitionStatsDF.schema
-              val indexedCols: Seq[String] = 
metaClient.getIndexMetadata.get().getIndexDefinitions.get(PARTITION_NAME_COLUMN_STATS).getSourceFields.asScala.toSeq
-              // to be fixed. HUDI-8836.
-              val indexFilter = 
queryFilters.map(translateIntoColumnStatsIndexFilterExpr(_, indexedCols = 
indexedCols)).reduce(And)
-              if (indexFilter.equals(TrueLiteral)) {
-                // if there are any non indexed cols or we can't translate 
source expr, we can prune partitions based on col stats lookup.
-                Some(allPartitions)
+      if (containsAnySqlFunction(queryFilters)) {
+        // If the query contains any SQL function, skip the pruning.
+        // Expression Index will be used in such cases, if available.
+        Option.empty
+      } else {

Review Comment:
   gotcha



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to