This is an automated email from the ASF dual-hosted git repository.
jiangtian pushed a commit to branch compaction_review
in repository https://gitbox.apache.org/repos/asf/iotdb.git
The following commit(s) were added to refs/heads/compaction_review by this push:
new 873a41b0241 temp save
873a41b0241 is described below
commit 873a41b0241182d004e287926fa296a9555bddea
Author: jt2594838 <[email protected]>
AuthorDate: Tue Jun 18 12:11:07 2024 +0800
temp save
---
.../utils/CrossSpaceCompactionCandidate.java | 152 +++++++++++----------
1 file changed, 82 insertions(+), 70 deletions(-)
diff --git
a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/compaction/selector/utils/CrossSpaceCompactionCandidate.java
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/compaction/selector/utils/CrossSpaceCompactionCandidate.java
index 7935ea4ddf8..ce8f91a9ec5 100644
---
a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/compaction/selector/utils/CrossSpaceCompactionCandidate.java
+++
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/compaction/selector/utils/CrossSpaceCompactionCandidate.java
@@ -90,78 +90,11 @@ public class CrossSpaceCompactionCandidate {
return false;
}
for (DeviceInfo unseqDeviceInfo : unseqFile.getDevices()) {
- IDeviceID deviceId = unseqDeviceInfo.deviceId;
- boolean atLeastOneSeqFileSelected = false;
- // The `previousSeqFile` means the seqFile which contains the device and
its endTime is just
- // be smaller than startTime of the device in unseqFile
- TsFileResourceCandidate previousSeqFile = null;
-
- // if a seq file actually overlaps or effectively overlaps with the
unseq file, the seq
- // file should be merged with the unseq file
- // e.g.: two seq files seq1 and seq2. seq1 ranges [0, 100] and seq2
ranges [200, 300]. One
- // unseq file unseq1 ranges [80, 150]. Then the [80, 100] part from
unseq1 actually
- // overlaps with seq1, while the [101, 150] part effectively overlaps
with seq2.
- for (TsFileResourceCandidate seqFile : seqFiles) {
- // If the seqFile may need to be selected but its invalid, the
selection should be
- // terminated.
- if ((!seqFile.isValidCandidate || !seqFile.hasDetailedDeviceInfo())
- && seqFile.mayHasOverlapWithUnseqFile(unseqDeviceInfo)) {
- return false;
- }
- if (!seqFile.containsDevice(deviceId)) {
- continue;
- }
- DeviceInfo seqDeviceInfo = seqFile.getDeviceInfoById(deviceId);
-
- // If the unsealed file is unclosed, the file should not be selected
only when its startTime
- // is larger than endTime of unseqFile. Or, the selection should be
terminated.
- if (seqFile.unsealed() && unseqDeviceInfo.endTime >=
seqDeviceInfo.startTime) {
- return false;
- }
- if (unseqDeviceInfo.endTime <= seqDeviceInfo.endTime) {
- // When scanning the target seqFiles for unseqFile, we traverse them
one by one no matter
- // whether it is selected or not. But we only add the unselected
seqFiles to next split to
- // avoid duplication selection
- tmpSplit.addSeqFileIfNotSelected(seqFile);
- seqFile.markAsSelected();
- atLeastOneSeqFileSelected = true;
- break;
- } else if (unseqDeviceInfo.startTime <= seqDeviceInfo.endTime) {
- tmpSplit.addSeqFileIfNotSelected(seqFile);
- seqFile.markAsSelected();
- atLeastOneSeqFileSelected = true;
- } else {
- if (!seqFile.unsealed()) {
- previousSeqFile = seqFile;
- }
- }
- }
-
- // If: 1.some seq files are removed by deletion or TTL; 2. the unseq
file is loaded; the
- // unseq file may have larger timestamps and does not overlap with any
seqFile.
- //
- // For example, previously we have two seqFiles, ranging [0, 100] [200,
300] respectively.
- // Then the seqFile of [200, 300] is deleted and an unseqFile of [150,
180] is written/loaded.
- // The unseqFile does not overlap with any seqFile and not seqFile has
larger timestamp
- // than it, thus the unseqFile cannot select any candidate seqFiles
throughput the loop above.
- //
- // In this case, the unseqFile should be merged with the last seqFile if
possible.
- // TODO: let insertionCompaction handle this case
-
- // That this judgement is true indicates `previousSeqFile` is
unnecessary.
- if (atLeastOneSeqFileSelected || previousSeqFile == null) {
- continue;
- }
-
- // That this judgement is ture indicates the `previousSeqFile` is
necessary, but it cannot be
- // selected as a candidate so the selection should be terminated.
- if (!previousSeqFile.isValidCandidate) {
+ // select candidates using each device
+ boolean canContinue = selectCandidateByDevice(unseqDeviceInfo);
+ if (canContinue) {
return false;
}
-
- // select the `previousSeqFile`
- tmpSplit.addSeqFileIfNotSelected(previousSeqFile);
- previousSeqFile.markAsSelected();
}
// mark candidates in next split as selected even though it may not be
added to the final
// TaskResource
@@ -183,6 +116,84 @@ public class CrossSpaceCompactionCandidate {
return true;
}
+ // return true if the selection can continue
+ private boolean selectCandidateByDevice(DeviceInfo unseqDeviceInfo,
+ CrossCompactionTaskResourceSplit tmpSplit) throws IOException {
+ IDeviceID deviceId = unseqDeviceInfo.deviceId;
+ boolean atLeastOneSeqFileSelected = false;
+ // The `previousSeqFile` means the seqFile which contains the device and
its endTime is just
+ // be smaller than startTime of the device in unseqFile
+ TsFileResourceCandidate previousSeqFile = null;
+
+ // if a seq file actually overlaps or effectively overlaps with the unseq
file, the seq
+ // file should be merged with the unseq file
+ // e.g.: two seq files seq1 and seq2. seq1 ranges [0, 100] and seq2 ranges
[200, 300]. One
+ // unseq file unseq1 ranges [80, 150]. Then the [80, 100] part from unseq1
actually
+ // overlaps with seq1, while the [101, 150] part effectively overlaps with
seq2.
+ for (TsFileResourceCandidate seqFile : seqFiles) {
+ // If the seqFile may need to be selected but its invalid, the selection
should be
+ // terminated.
+ if ((!seqFile.isValidCandidate || !seqFile.hasDetailedDeviceInfo())
+ && seqFile.mayHasOverlapWithUnseqFile(unseqDeviceInfo)) {
+ return false;
+ }
+ if (!seqFile.containsDevice(deviceId)) {
+ return true;
+ }
+ DeviceInfo seqDeviceInfo = seqFile.getDeviceInfoById(deviceId);
+
+ // If the unsealed file is unclosed, the file should not be selected
only when its startTime
+ // is larger than endTime of unseqFile. Or, the selection should be
terminated.
+ if (seqFile.unsealed() && unseqDeviceInfo.endTime >=
seqDeviceInfo.startTime) {
+ return false;
+ }
+ if (unseqDeviceInfo.endTime <= seqDeviceInfo.endTime) {
+ // When scanning the target seqFiles for unseqFile, we traverse them
one by one no matter
+ // whether it is selected or not. But we only add the unselected
seqFiles to next split to
+ // avoid duplication selection
+ tmpSplit.addSeqFileIfNotSelected(seqFile);
+ seqFile.markAsSelected();
+ atLeastOneSeqFileSelected = true;
+ break;
+ } else if (unseqDeviceInfo.startTime <= seqDeviceInfo.endTime) {
+ tmpSplit.addSeqFileIfNotSelected(seqFile);
+ seqFile.markAsSelected();
+ atLeastOneSeqFileSelected = true;
+ } else {
+ if (!seqFile.unsealed()) {
+ previousSeqFile = seqFile;
+ }
+ }
+ }
+
+ // If: 1.some seq files are removed by deletion or TTL; 2. the unseq file
is loaded; the
+ // unseq file may have larger timestamps and does not overlap with any
seqFile.
+ //
+ // For example, previously we have two seqFiles, ranging [0, 100] [200,
300] respectively.
+ // Then the seqFile of [200, 300] is deleted and an unseqFile of [150,
180] is written/loaded.
+ // The unseqFile does not overlap with any seqFile and not seqFile has
larger timestamp
+ // than it, thus the unseqFile cannot select any candidate seqFiles
throughput the loop above.
+ //
+ // In this case, the unseqFile should be merged with the last seqFile if
possible.
+ // TODO: let insertionCompaction handle this case
+
+ // That this judgement is true indicates `previousSeqFile` is unnecessary.
+ if (atLeastOneSeqFileSelected || previousSeqFile == null) {
+ return true;
+ }
+
+ // That this judgement is ture indicates the `previousSeqFile` is
necessary, but it cannot be
+ // selected as a candidate so the selection should be terminated.
+ if (!previousSeqFile.isValidCandidate) {
+ return false;
+ }
+
+ // select the `previousSeqFile`
+ tmpSplit.addSeqFileIfNotSelected(previousSeqFile);
+ previousSeqFile.markAsSelected();
+ return true;
+ }
+
private TsFileResourceCandidate getLatestSealedSeqFile(
List<TsFileResourceCandidate> seqResourceCandidateList) {
for (int i = seqResourceCandidateList.size() - 1; i >= 0; i--) {
@@ -249,6 +260,7 @@ public class CrossSpaceCompactionCandidate {
}
public static class CrossCompactionTaskResourceSplit {
+
@SuppressWarnings("squid:S1104")
public TsFileResourceCandidate unseqFile;