vinothchandar commented on a change in pull request #2926:
URL: https://github.com/apache/hudi/pull/2926#discussion_r638929143
##########
File path:
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/MergeOnReadSnapshotRelation.scala
##########
@@ -131,15 +133,28 @@ class MergeOnReadSnapshotRelation(val sqlContext:
SQLContext,
rdd.asInstanceOf[RDD[Row]]
}
- def buildFileIndex(): List[HoodieMergeOnReadFileSplit] = {
+ def buildFileIndex(filters: Array[Filter]): List[HoodieMergeOnReadFileSplit]
= {
+
val fileStatuses = if (globPaths.isDefined) {
// Load files from the global paths if it has defined to be compatible
with the original mode
val inMemoryFileIndex =
HoodieSparkUtils.createInMemoryFileIndex(sqlContext.sparkSession, globPaths.get)
inMemoryFileIndex.allFiles()
} else { // Load files by the HoodieFileIndex.
val hoodieFileIndex = HoodieFileIndex(sqlContext.sparkSession,
metaClient,
Some(tableStructSchema), optParams,
FileStatusCache.getOrCreate(sqlContext.sparkSession))
- hoodieFileIndex.allFiles
+
+ // Get partition filter and convert to catalyst expression
+ val partitionColumns = hoodieFileIndex.partitionSchema.fieldNames.toSet
+ val partitionFilters= filters.filter(f => f.references.forall(p =>
partitionColumns.contains(p)))
Review comment:
nit: space before `=`
##########
File path:
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/MergeOnReadSnapshotRelation.scala
##########
@@ -182,4 +197,98 @@ object MergeOnReadSnapshotRelation {
// when create PartitionedFile.
path.toUri.toString
}
+
+ /**
+ * Convert Filters to Catalyst Expressions and joined by And. If convert
success return an
+ * Non-Empty Option[Expression],or else return None.
+ */
+ def convertToCatalystExpressions(filters: Array[Filter],
Review comment:
can we encapsulate this conversion logic into its own class. I could see
general use for this, beyond just partition pruning?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]