This is an automated email from the ASF dual-hosted git repository. jackietien pushed a commit to branch OnlyUnpackNeeded in repository https://gitbox.apache.org/repos/asf/iotdb.git
commit 6e1650fc47f5c65b6137e2f009e43f03b849512c Author: JackieTien97 <[email protected]> AuthorDate: Wed Dec 6 18:08:39 2023 +0800 Opt for only unpacking tsfile when it's really needed --- .../execution/operator/source/SeriesScanUtil.java | 96 +++++++++++++++++++--- 1 file changed, 83 insertions(+), 13 deletions(-) diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/execution/operator/source/SeriesScanUtil.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/execution/operator/source/SeriesScanUtil.java index 77f1c100f4d..607d6bffa0c 100644 --- a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/execution/operator/source/SeriesScanUtil.java +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/execution/operator/source/SeriesScanUtil.java @@ -985,19 +985,61 @@ public class SeriesScanUtil { * approach is likely to be ubiquitous, but it keeps the system running smoothly */ @SuppressWarnings("squid:S3776") // Suppress high Cognitive Complexity warning - protected void tryToUnpackAllOverlappedFilesToTimeSeriesMetadata() throws IOException { - /* - * Fill sequence TimeSeriesMetadata List until it is not empty - */ - while (seqTimeSeriesMetadata.isEmpty() && orderUtils.hasNextSeqResource()) { - unpackSeqTsFileResource(); - } - - /* - * Fill unSequence TimeSeriesMetadata Priority Queue until it is not empty - */ - while (unSeqTimeSeriesMetadata.isEmpty() && orderUtils.hasNextUnseqResource()) { - unpackUnseqTsFileResource(); + private void tryToUnpackAllOverlappedFilesToTimeSeriesMetadata() throws IOException { + + // we try to unpack tsfile which we really need instead of unpacking at least one seq and one + // unseq timeseries metadata each time + // in some case that has limit clasue, if the seq and unseq timeseries metadata are not + // overlapped, we can save one disk IO(if cache missed). + while (seqTimeSeriesMetadata.isEmpty() || unSeqTimeSeriesMetadata.isEmpty()) { + + if (!seqTimeSeriesMetadata + .isEmpty()) { // already unpack one seq tsfile, we need to judge whether we still need to + // unpack the unseq tsfile + if (!orderUtils.hasNextUnseqResource() + || orderUtils.isOverlapped( + seqTimeSeriesMetadata.get(0).getStatistics(), + orderUtils.getNextUnseqFileResource(false))) { + break; + } else { + // unpack the unseq tsfile only if it's overlapped with the first seqTimeSeriesMetadata + unpackUnseqTsFileResource(); + } + } else if (!unSeqTimeSeriesMetadata + .isEmpty()) { // already unpack one unseq tsfile, we need to judge whether we still need + // to unpack the seq tsfile + if (!orderUtils.hasNextSeqResource() + || orderUtils.isOverlapped( + unSeqTimeSeriesMetadata.peek().getStatistics(), + orderUtils.getNextSeqFileResource(false))) { + break; + } else { + // unpack the seq tsfile only if it's overlapped with the first unseqTimeSeriesMetadata + unpackSeqTsFileResource(); + } + } else { // we haven't got one seqTimeSeriesMetadata or unseqTimeSeriesMetadata + if (!orderUtils.hasNextSeqResource() && !orderUtils.hasNextUnseqResource()) { + // if there are no more tsfiles, we just break + break; + } else if (!orderUtils.hasNextUnseqResource()) { + // only has seq tsfiles + unpackSeqTsFileResource(); + } else if (!orderUtils.hasNextSeqResource()) { + // only has unseq tsfiles + unpackUnseqTsFileResource(); + } else { + // we have both seq and unseq tsfiles, we need to decide which to firstly unpack + // if it's asc, we unpack tsfile which has the minimum start time + // if it's desc. we unpack tsfile which has the maximum end time + if (orderUtils.isTakeSeqAsFirst( + orderUtils.getNextSeqFileResource(false), + orderUtils.getNextUnseqFileResource(false))) { + unpackSeqTsFileResource(); + } else { + unpackUnseqTsFileResource(); + } + } + } } /* @@ -1219,6 +1261,8 @@ public class SeriesScanUtil { boolean isOverlapped(long time, TsFileResource right); + boolean isOverlapped(Statistics<? extends Object> left, TsFileResource right); + <T> Comparator<T> comparingLong(ToLongFunction<? super T> keyExtractor); long getCurrentEndPoint(long time, Statistics<? extends Object> statistics); @@ -1232,6 +1276,8 @@ public class SeriesScanUtil { boolean isTakeSeqAsFirst( Statistics<? extends Object> seqStatistics, Statistics<? extends Object> unseqStatistics); + boolean isTakeSeqAsFirst(TsFileResource seqTsFileResource, TsFileResource unseqTsFileResource); + boolean getAscending(); boolean hasNextSeqResource(); @@ -1282,6 +1328,11 @@ public class SeriesScanUtil { return time <= right.getEndTime(seriesPath.getDevice()); } + @Override + public boolean isOverlapped(Statistics<?> left, TsFileResource right) { + return left.getStartTime() <= right.getEndTime(seriesPath.getDevice()); + } + @Override public <T> Comparator<T> comparingLong(ToLongFunction<? super T> keyExtractor) { Objects.requireNonNull(keyExtractor); @@ -1311,6 +1362,13 @@ public class SeriesScanUtil { return seqStatistics.getEndTime() > unseqStatistics.getEndTime(); } + @Override + public boolean isTakeSeqAsFirst( + TsFileResource seqTsFileResource, TsFileResource unseqTsFileResource) { + String deviceId = seriesPath.getDevice(); + return seqTsFileResource.getEndTime(deviceId) > unseqTsFileResource.getEndTime(deviceId); + } + @Override public boolean getAscending() { return false; @@ -1405,6 +1463,11 @@ public class SeriesScanUtil { return time >= right.getStartTime(seriesPath.getDevice()); } + @Override + public boolean isOverlapped(Statistics<?> left, TsFileResource right) { + return left.getEndTime() >= right.getStartTime(seriesPath.getDevice()); + } + @Override public <T> Comparator<T> comparingLong(ToLongFunction<? super T> keyExtractor) { Objects.requireNonNull(keyExtractor); @@ -1434,6 +1497,13 @@ public class SeriesScanUtil { return seqStatistics.getStartTime() < unseqStatistics.getStartTime(); } + @Override + public boolean isTakeSeqAsFirst( + TsFileResource seqTsFileResource, TsFileResource unseqTsFileResource) { + String deviceId = seriesPath.getDevice(); + return seqTsFileResource.getStartTime(deviceId) < unseqTsFileResource.getStartTime(deviceId); + } + @Override public boolean getAscending() { return true;
