nsivabalan commented on code in PR #12935:
URL: https://github.com/apache/hudi/pull/12935#discussion_r1985683446
##########
hudi-common/src/main/java/org/apache/hudi/common/table/read/HoodieBaseFileGroupRecordBuffer.java:
##########
@@ -424,8 +424,10 @@ protected Option<T> merge(Option<T> older, Map<String,
Object> olderInfoMap,
}
Comparable oldOrderingValue = readerContext.getOrderingValue(
older, olderInfoMap, readerSchema, orderingFieldName);
- if (!isDeleteRecordWithNaturalOrder(older, oldOrderingValue)
- && oldOrderingValue.compareTo(newOrderingValue) > 0) {
+ boolean choosePrev = !oldOrderingValue.equals(0)
+ && ReflectionUtils.isSameClass(oldOrderingValue,
newOrderingValue)
Review Comment:
can you help understand why do we need this change?
Also, can you replace `0` with DEFAULT_ORDERING_VALUE
##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/BaseTableServicePlanActionExecutor.java:
##########
@@ -124,7 +124,9 @@ public Pair<Option<HoodieInstant>, Set<String>>
getIncrementalPartitions(TableSe
.filter(this::filterCommitByTableType).flatMap(instant -> {
try {
String completionTime = instant.getCompletionTime();
- if (completionTime.compareTo(leftBoundary) >= 0 &&
completionTime.compareTo(rightBoundary) < 0) {
+ if (completionTime.compareTo(leftBoundary) >= 0
+ && ((instant.requestedTime().length() <
completionTime.length() && instant.requestedTime().compareTo(rightBoundary) < 0)
Review Comment:
can we add java docs please.
##########
hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/SparkFileFormatInternalRowReaderContext.scala:
##########
@@ -59,17 +60,16 @@ import scala.collection.mutable
* @param filters spark filters that might be pushed down into the
reader
* @param requiredFilters filters that are required and should always be
used, even in merging situations
*/
-class SparkFileFormatInternalRowReaderContext(parquetFileReader:
SparkParquetReader,
- filters: Seq[Filter],
- requiredFilters: Seq[Filter])
extends BaseSparkInternalRowReaderContext {
+class SparkFileFormatInternalRowReaderContext(parquetFileReader:
SparkParquetReader, filters: Seq[Filter],
+ requiredFilters: Seq[Filter],
tableVersion: HoodieTableVersion) extends BaseSparkInternalRowReaderContext {
lazy val sparkAdapter: SparkAdapter = SparkAdapterSupport.sparkAdapter
private lazy val bootstrapSafeFilters: Seq[Filter] =
filters.filter(filterIsSafeForBootstrap) ++ requiredFilters
private val deserializerMap: mutable.Map[Schema, HoodieAvroDeserializer] =
mutable.Map()
private val serializerMap: mutable.Map[Schema, HoodieAvroSerializer] =
mutable.Map()
private lazy val allFilters = filters ++ requiredFilters
override def supportsParquetRowIndex: Boolean = {
- HoodieSparkUtils.gteqSpark3_5
+ HoodieSparkUtils.gteqSpark3_5 &&
tableVersion.greaterThanOrEquals(HoodieTableVersion.EIGHT)
Review Comment:
why is this required?
or in other words, why can't we use support ParquetRowIndex with table
version 6,
##########
hudi-common/src/main/java/org/apache/hudi/common/table/read/HoodieBaseFileGroupRecordBuffer.java:
##########
@@ -424,8 +424,10 @@ protected Option<T> merge(Option<T> older, Map<String,
Object> olderInfoMap,
}
Comparable oldOrderingValue = readerContext.getOrderingValue(
older, olderInfoMap, readerSchema, orderingFieldName);
- if (!isDeleteRecordWithNaturalOrder(older, oldOrderingValue)
- && oldOrderingValue.compareTo(newOrderingValue) > 0) {
+ boolean choosePrev = !oldOrderingValue.equals(0)
+ && ReflectionUtils.isSameClass(oldOrderingValue,
newOrderingValue)
Review Comment:
yes, lets add a test if we don't have this covered already
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]