This is an automated email from the ASF dual-hosted git repository. leirui pushed a commit to branch research/M4-visualization in repository https://gitbox.apache.org/repos/asf/iotdb.git
commit c5fa371b3e2b11aa27af3df06421feec1b540fac Author: Lei Rui <[email protected]> AuthorDate: Fri Jul 7 15:29:49 2023 +0800 fix --- .../file/metadata/statistics/StepRegress.java | 14 +++++++-- .../iotdb/tsfile/read/common/ChunkSuit4CPV.java | 36 +++++++++------------- 2 files changed, 26 insertions(+), 24 deletions(-) diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/statistics/StepRegress.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/statistics/StepRegress.java index 3e14571d3d0..9898d145e6f 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/statistics/StepRegress.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/statistics/StepRegress.java @@ -61,7 +61,7 @@ public class StepRegress { private double mean = 0; // mean of intervals private double stdDev = 0; // standard deviation of intervals - private long count = 0; + public long count = 0; private double sumX2 = 0.0; private double sumX1 = 0.0; @@ -409,12 +409,22 @@ public class StepRegress { } } + public int infer(double t, int count) throws IOException { + int pos = (int) Math.round(infer_internal(t)); // starting from 1 + // As double loses precision, although the last point is passed at the phase of learning, + // the inferred pos by rounding may still exceed max position count. + if (pos > count) { + pos--; + } + return pos; + } + /** * @param t input timestamp * @return output the value of the step regression function f(t), which is the estimated position * in the chunk. Pay attention that f(t) starts from (startTime,1), ends at (endTime,count). */ - public double infer(double t) throws IOException { + public double infer_internal(double t) throws IOException { if (segmentKeys.size() == 1) { // TODO DEBUG return 1; } diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/read/common/ChunkSuit4CPV.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/read/common/ChunkSuit4CPV.java index b5791f0ed4e..b640632e113 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/read/common/ChunkSuit4CPV.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/read/common/ChunkSuit4CPV.java @@ -32,13 +32,11 @@ import org.apache.iotdb.tsfile.file.metadata.statistics.StepRegress; import org.apache.iotdb.tsfile.read.common.IOMonitor2.Operation; import org.apache.iotdb.tsfile.read.reader.page.PageReader; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - import java.io.IOException; public class ChunkSuit4CPV { - private static final Logger M4_CHUNK_METADATA = LoggerFactory.getLogger("M4_CHUNK_METADATA"); + + // private static final Logger M4_CHUNK_METADATA = LoggerFactory.getLogger("M4_CHUNK_METADATA"); private ChunkMetadata chunkMetadata; // fixed info, including version, dataType, stepRegress public int modelPointsCursor = @@ -258,7 +256,10 @@ public class ChunkSuit4CPV { if (TSFileDescriptor.getInstance().getConfig().isUseTimeIndex()) { StepRegress stepRegress = chunkMetadata.getStatistics().getStepRegress(); // infer position starts from 1, so minus 1 here - estimatedPos = (int) Math.round(stepRegress.infer(targetTimestamp)) - 1; + // note get count from global chunkMetadata.getStatistics().getCount(), not local + // statistics.getCount + estimatedPos = + stepRegress.infer(targetTimestamp, chunkMetadata.getStatistics().getCount()) - 1; // search from estimatePos in the timeBuffer to find the closet timestamp equal to or larger // than the given timestamp @@ -374,24 +375,13 @@ public class ChunkSuit4CPV { if (TSFileDescriptor.getInstance().getConfig().isUseTimeIndex()) { StepRegress stepRegress = chunkMetadata.getStatistics().getStepRegress(); // infer position starts from 1, so minus 1 here - estimatedPos = (int) Math.round(stepRegress.infer(targetTimestamp)) - 1; + // note get count from global chunkMetadata.getStatistics().getCount(), not local + // statistics.getCount + estimatedPos = + stepRegress.infer(targetTimestamp, chunkMetadata.getStatistics().getCount()) - 1; // search from estimatePos in the timeBuffer to find the closet timestamp equal to or smaller // than the given timestamp - // TODO debug - try { - long tmp = pageReader.timeBuffer.getLong(estimatedPos * 8); - } catch (Exception e) { - M4_CHUNK_METADATA.info("targetTimestamp=" + targetTimestamp); - M4_CHUNK_METADATA.info("estimatedPos=" + estimatedPos); - M4_CHUNK_METADATA.info("count=" + chunkMetadata.getStatistics().getCount()); - M4_CHUNK_METADATA.info("stepregress segmentKeys=" + stepRegress.getSegmentKeys()); - M4_CHUNK_METADATA.info( - "stepregress segmentIntercepts=" + stepRegress.getSegmentIntercepts()); - M4_CHUNK_METADATA.info("stepregress slope=" + stepRegress.getSlope()); - throw e; - } - if (pageReader.timeBuffer.getLong(estimatedPos * 8) > targetTimestamp) { while (pageReader.timeBuffer.getLong(estimatedPos * 8) > targetTimestamp) { estimatedPos--; @@ -510,8 +500,10 @@ public class ChunkSuit4CPV { if (TSFileDescriptor.getInstance().getConfig().isUseTimeIndex()) { StepRegress stepRegress = chunkMetadata.getStatistics().getStepRegress(); // infer position starts from 1, so minus 1 here - // TODO debug buffer.get(index) - int estimatedPos = (int) Math.round(stepRegress.infer(targetTimestamp)) - 1; + // note get count from global chunkMetadata.getStatistics().getCount(), not local + // statistics.getCount + int estimatedPos = + stepRegress.infer(targetTimestamp, chunkMetadata.getStatistics().getCount()) - 1; // search from estimatePos in the timeBuffer to find the closet timestamp equal to or smaller // than the given timestamp
