danny0405 commented on code in PR #10344:
URL: https://github.com/apache/hudi/pull/10344#discussion_r1433432356
##########
hudi-common/src/main/java/org/apache/hudi/common/util/collection/ExternalSpillableMap.java:
##########
@@ -78,41 +78,49 @@ public class ExternalSpillableMap<T extends Serializable, R
extends Serializable
// Enables compression of values stored in disc
private final boolean isCompressionEnabled;
// current space occupied by this map in-memory
- private Long currentInMemoryMapSize;
+ private long currentInMemoryMapSize;
// An estimate of the size of each payload written to this map
private volatile long estimatedPayloadSize = 0;
// Base File Path
private final String baseFilePath;
- public ExternalSpillableMap(Long maxInMemorySizeInBytes, String
baseFilePath, SizeEstimator<T> keySizeEstimator,
+ public ExternalSpillableMap(long maxInMemorySizeInBytes, String
baseFilePath, SizeEstimator<T> keySizeEstimator,
SizeEstimator<R> valueSizeEstimator) throws
IOException {
this(maxInMemorySizeInBytes, baseFilePath, keySizeEstimator,
valueSizeEstimator, DiskMapType.BITCASK);
}
- public ExternalSpillableMap(Long maxInMemorySizeInBytes, String
baseFilePath, SizeEstimator<T> keySizeEstimator,
+ public ExternalSpillableMap(long maxInMemorySizeInBytes, String
baseFilePath, SizeEstimator<T> keySizeEstimator,
SizeEstimator<R> valueSizeEstimator, DiskMapType
diskMapType) throws IOException {
this(maxInMemorySizeInBytes, baseFilePath, keySizeEstimator,
valueSizeEstimator, diskMapType, false);
}
- public ExternalSpillableMap(Long maxInMemorySizeInBytes, String
baseFilePath, SizeEstimator<T> keySizeEstimator,
+ public ExternalSpillableMap(long maxInMemorySizeInBytes, String
baseFilePath, SizeEstimator<T> keySizeEstimator,
SizeEstimator<R> valueSizeEstimator, DiskMapType
diskMapType, boolean isCompressionEnabled) throws IOException {
this.inMemoryMap = new HashMap<>();
this.baseFilePath = baseFilePath;
- this.maxInMemorySizeInBytes = (long) Math.floor(maxInMemorySizeInBytes *
sizingFactorForInMemoryMap);
+ this.maxInMemorySizeInBytes = (long) Math.floor(maxInMemorySizeInBytes *
SIZING_FACTOR_FOR_IN_MEMORY_MAP);
this.currentInMemoryMapSize = 0L;
this.keySizeEstimator = keySizeEstimator;
this.valueSizeEstimator = valueSizeEstimator;
this.diskMapType = diskMapType;
this.isCompressionEnabled = isCompressionEnabled;
}
+ private DiskMap<T, R> getDiskBasedMap() {
+ return getDiskBasedMap(false);
+ }
+
+ private DiskMap<T, R> getOrCreateDiskBasedMap() {
+ return getDiskBasedMap(true);
+ }
+
private DiskMap<T, R> getDiskBasedMap(boolean forceInitialization) {
if (null == diskBasedMap) {
- if (!forceInitialization) {
- return DiskMap.empty();
- }
synchronized (this) {
if (null == diskBasedMap) {
+ if (!forceInitialization) {
+ return DiskMap.empty();
Review Comment:
What `#contains` check are you referring to:
```java
@Override
public R put(T key, R value) {
if (this.estimatedPayloadSize == 0) {
// At first, use the sizeEstimate of a record being inserted into the
spillable map.
// Note, the converter may over-estimate the size of a record in the
JVM
this.estimatedPayloadSize = keySizeEstimator.sizeEstimate(key) +
valueSizeEstimator.sizeEstimate(value);
} else if (this.inMemoryMap.size() %
NUMBER_OF_RECORDS_TO_ESTIMATE_PAYLOAD_SIZE == 0) {
this.estimatedPayloadSize = (long) (this.estimatedPayloadSize * 0.9 +
(keySizeEstimator.sizeEstimate(key) + valueSizeEstimator.sizeEstimate(value)) *
0.1);
this.currentInMemoryMapSize = this.inMemoryMap.size() *
this.estimatedPayloadSize;
}
if (this.inMemoryMap.containsKey(key)) {
this.inMemoryMap.put(key, value);
} else if (this.currentInMemoryMapSize < this.maxInMemorySizeInBytes) {
this.currentInMemoryMapSize += this.estimatedPayloadSize;
// Remove the old version of the record from disk first to avoid data
duplication.
if (inDiskContainsKey(key)) {
getDiskBasedMap().remove(key);
}
this.inMemoryMap.put(key, value);
} else {
getDiskBasedMap().put(key, value);
}
return value;
}
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]