danny0405 commented on code in PR #18018:
URL: https://github.com/apache/hudi/pull/18018#discussion_r2744622275
##########
hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/partitioner/index/RecordIndexCache.java:
##########
@@ -110,27 +126,39 @@ public void update(String recordKey,
HoodieRecordGlobalLocation recordGlobalLoca
ValidationUtils.checkArgument(!caches.isEmpty(), "record index cache
should not be empty.");
// Get the sub cache with the largest checkpoint ID (first entry in the
reverse-ordered TreeMap)
caches.firstEntry().getValue().put(recordKey, recordGlobalLocation);
+
+ if ((++recordCnt) % NUMBER_OF_RECORDS_TO_CHECK_MEMORY_SIZE == 0 &&
getInMemoryMapSize() >= this.maxCacheSizeInBytes) {
+ doClean();
+ recordCnt = 0;
+ }
}
/**
- * Clean all the cache entries for checkpoint whose id is less than the
given checkpoint id.
+ * Mark the cache entries cleanable, whose checkpoint id is less than the
given checkpoint id.
*
- * @param checkpointId the id of checkpoint
+ * @param checkpointId The checkpoint id for the minimum inflight instant
*/
- public void clean(long checkpointId) {
- NavigableMap<Long, ExternalSpillableMap<String,
HoodieRecordGlobalLocation>> subMap;
- if (checkpointId == Long.MAX_VALUE) {
- // clean all the cache entries for old checkpoint ids, and only keeps
the cache for the maximum checkpoint id,
- // which aims to clear memory while also ensuring a certain cache hit
rate
- subMap = caches.firstEntry() == null ? Collections.emptyNavigableMap() :
caches.tailMap(caches.firstKey(), false);
- } else {
- subMap = caches.tailMap(checkpointId, false);
+ public void markCleanable(long checkpointId) {
Review Comment:
java has notion like soft reference of a obj that marked the obj could be
garbage collected, maybe we can take a reference to the naming.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]