This is an automated email from the ASF dual-hosted git repository. qiaojialin pushed a commit to branch add_delete_query_doc in repository https://gitbox.apache.org/repos/asf/incubator-iotdb.git
commit ee3266666f8590eb27656aab3b524b76298cb546 Author: qiaojialin <[email protected]> AuthorDate: Fri Apr 3 16:40:40 2020 +0800 add docs for deletion handle in query --- docs/SystemDesign/5-DataQuery/1-DataQuery.md | 3 ++ .../5-DataQuery/8-ModificationHandle.md | 63 ++++++++++++++++++++++ docs/zh/SystemDesign/5-DataQuery/1-DataQuery.md | 3 ++ .../5-DataQuery/8-ModificationHandle.md | 63 ++++++++++++++++++++++ .../iotdb/db/query/reader/series/SeriesReader.java | 4 +- .../org/apache/iotdb/db/utils/FileLoaderUtils.java | 21 ++++---- site/src/main/.vuepress/config.js | 2 + 7 files changed, 146 insertions(+), 13 deletions(-) diff --git a/docs/SystemDesign/5-DataQuery/1-DataQuery.md b/docs/SystemDesign/5-DataQuery/1-DataQuery.md index 37222ff..9325e45 100644 --- a/docs/SystemDesign/5-DataQuery/1-DataQuery.md +++ b/docs/SystemDesign/5-DataQuery/1-DataQuery.md @@ -28,6 +28,7 @@ There are several types of data queries * Downsampling query * Single point supplementary null query * Latest data query +* Align by device query In order to achieve the above kinds of queries, a basic query component for a single time series is designed in the IoTDB query engine, and on this basis, various query functions are implemented. @@ -38,3 +39,5 @@ In order to achieve the above kinds of queries, a basic query component for a si * [Aggregate query](/SystemDesign/5-DataQuery/4-AggregationQuery.html) * [Downsampling query](/SystemDesign/5-DataQuery/5-GroupByQuery.html) * [Recent timestamp query](/SystemDesign/5-DataQuery/6-LastQuery.html) +* [Align by device query](/SystemDesign/5-DataQuery/7-AlignByDeviceQuery.html) +* [Modification handle](/SystemDesign/5-DataQuery/8-ModificationHandle.html) \ No newline at end of file diff --git a/docs/SystemDesign/5-DataQuery/8-ModificationHandle.md b/docs/SystemDesign/5-DataQuery/8-ModificationHandle.md new file mode 100644 index 0000000..1166250 --- /dev/null +++ b/docs/SystemDesign/5-DataQuery/8-ModificationHandle.md @@ -0,0 +1,63 @@ +<!-- + + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +--> + +# Modification handling in query + +Data deletion only record a mods file for disk data, the data is not really deleted. Therefore, we need to consider the modifications in query. + +Each timeseries is treated independently in query process. For each timeseries, there are 5 levels: TsFileResource -> TimeseriesMetadata -> ChunkMetadata -> IPageReader -> BatchData + +Query resource: TsFileResource and possibly exist mods file. If a TsFile is influenced by deletion, a modification log will be recorded in its mods file. The log contains 3 parts: path, deleted time, version + + + +* TsFileResource -> TimeseriesMetadata + +``` +// Set the statistics in TimeseriesMetadata unusable if the timeseries contains modifications +FileLoaderUtils.loadTimeseriesMetadata() +``` + +* TimeseriesMetadata -> List\<ChunkMetadata\> + +``` +// For each ChunkMetadata, find the largest timestamp in all modifications whose version is larger than it. Set deleted time to ChunkMetadata. +// set the statistics in ChunkMetadata is unusable if it is affected by deletion +FileLoaderUtils.loadChunkMetadataList() +``` + +E.g., the got ChunkMetadatas are: + + + +* ChunkMetadata -> List\<IPageReader\> + +``` +// Skip the fully deleted page, set deleteAt into PageReader,Set the page statistics unusalbe if it is affected by deletion +FileLoaderUtils.loadPageReaderList() +``` + +* IPageReader -> BatchData + +``` +// For disk page, skip the data points that be deleted and filterd out. For memory data, skip data points be filtered out. +IPageReader.getAllSatisfiedPageData() +``` \ No newline at end of file diff --git a/docs/zh/SystemDesign/5-DataQuery/1-DataQuery.md b/docs/zh/SystemDesign/5-DataQuery/1-DataQuery.md index 0ace265..47cbc19 100644 --- a/docs/zh/SystemDesign/5-DataQuery/1-DataQuery.md +++ b/docs/zh/SystemDesign/5-DataQuery/1-DataQuery.md @@ -28,6 +28,7 @@ * 降采样查询 * 单点补空值查询 * 最新数据查询 +* 按设备对齐查询 为了实现以上几种查询,IoTDB 查询引擎中设计了针对单个时间序列的基础查询组件,在此基础上,实现了多种查询功能。 @@ -38,3 +39,5 @@ * [聚合查询](/zh/SystemDesign/5-DataQuery/4-AggregationQuery.html) * [降采样查询](/zh/SystemDesign/5-DataQuery/5-GroupByQuery.html) * [最近时间戳查询](/zh/SystemDesign/5-DataQuery/6-LastQuery.html) +* [按设备对齐查询](/zh/SystemDesign/5-DataQuery/7-AlignByDeviceQuery.html) +* [查询中的数据删改处理](/zh/SystemDesign/5-DataQuery/8-ModificationHandle.html) diff --git a/docs/zh/SystemDesign/5-DataQuery/8-ModificationHandle.md b/docs/zh/SystemDesign/5-DataQuery/8-ModificationHandle.md new file mode 100644 index 0000000..ce4ff7b --- /dev/null +++ b/docs/zh/SystemDesign/5-DataQuery/8-ModificationHandle.md @@ -0,0 +1,63 @@ +<!-- + + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +--> + +# 查询中的数据修改处理 + +数据删除操作对磁盘数据只记录了 mods 文件,并未真正执行删除逻辑,因此查询时需要考虑数据删除的逻辑。 + +查询时每个时间序列会单独处理。针对一个时间序列,由大到小有 5 个层次:TsFileResource -> TimeseriesMetadata -> ChunkMetadata -> IPageReader -> BatchData + +查询资源:TsFileResource 以及可能存在的 mods 文件,如果一个文件中有数据被删除了,将删除操作记录到 mods 文件中。记录三列:删除的时间序列,删除范围的最大时间点,删除操作对应的版本。 + + + +* TsFileResource -> TimeseriesMetadata + +``` +// 只要这个时间序列有对应的 modification,就标记 TimeseriesMetadata 中的统计信息不可用 +FileLoaderUtils.loadTimeseriesMetadata() +``` + +* TimeseriesMetadata -> List\<ChunkMetadata\> + +``` +// 对于每个 ChunkMetadata,找到比其 version 大的所有 modification 中最大时间戳, 设置到 ChunkMetadata 的 deleteAt 中,并标记 统计信息不可用 +FileLoaderUtils.loadChunkMetadataList() +``` + +对于以上示例,读取到的 ChunkMetadataList 为 + + + +* ChunkMetadata -> List\<IPageReader\> + +``` +// 跳过被完全删除的 Page,将 deleteAt 设置到 PageReader 里,将数据被部分删除的 page 标记统计信息不可用 +FileLoaderUtils.loadPageReaderList() +``` + +* IPageReader -> BatchData + +``` +// 对于磁盘数据,跳过被删除的和过滤掉的,对于内存数据,跳过被过滤掉的 +IPageReader.getAllSatisfiedPageData() +``` + diff --git a/server/src/main/java/org/apache/iotdb/db/query/reader/series/SeriesReader.java b/server/src/main/java/org/apache/iotdb/db/query/reader/series/SeriesReader.java index 1586d6f..5590615 100644 --- a/server/src/main/java/org/apache/iotdb/db/query/reader/series/SeriesReader.java +++ b/server/src/main/java/org/apache/iotdb/db/query/reader/series/SeriesReader.java @@ -241,7 +241,7 @@ class SeriesReader { } private void unpackOneTimeSeriesMetadata(TimeseriesMetadata timeSeriesMetadata) throws IOException { - cachedChunkMetadata.addAll(FileLoaderUtils.loadChunkMetadata(timeSeriesMetadata)); + cachedChunkMetadata.addAll(FileLoaderUtils.loadChunkMetadataList(timeSeriesMetadata)); } boolean isChunkOverlapped() throws IOException { @@ -338,7 +338,7 @@ class SeriesReader { } private void unpackOneChunkMetaData(ChunkMetadata chunkMetaData) throws IOException { - FileLoaderUtils.loadPageReader(chunkMetaData, timeFilter) + FileLoaderUtils.loadPageReaderList(chunkMetaData, timeFilter) .forEach(pageReader -> cachedPageReaders.add(new VersionPageReader(chunkMetaData.getVersion(), pageReader))); } diff --git a/server/src/main/java/org/apache/iotdb/db/utils/FileLoaderUtils.java b/server/src/main/java/org/apache/iotdb/db/utils/FileLoaderUtils.java index 9f9cb13..8884a7d 100644 --- a/server/src/main/java/org/apache/iotdb/db/utils/FileLoaderUtils.java +++ b/server/src/main/java/org/apache/iotdb/db/utils/FileLoaderUtils.java @@ -90,7 +90,7 @@ public class FileLoaderUtils { * @param allSensors measurements queried at the same time of this device */ public static TimeseriesMetadata loadTimeSeriesMetadata(TsFileResource resource, Path seriesPath, - QueryContext context, Filter timeFilter, Set<String> allSensors) throws IOException { + QueryContext context, Filter timeFilter, Set<String> allSensors) throws IOException { TimeseriesMetadata timeSeriesMetadata; if (resource.isClosed()) { timeSeriesMetadata = TimeSeriesMetadataCache.getInstance() @@ -129,7 +129,8 @@ public class FileLoaderUtils { * load all chunk metadata of one time series in one file. * @param timeSeriesMetadata the corresponding TimeSeriesMetadata in that file. */ - public static List<ChunkMetadata> loadChunkMetadata(TimeseriesMetadata timeSeriesMetadata) throws IOException { + public static List<ChunkMetadata> loadChunkMetadataList(TimeseriesMetadata timeSeriesMetadata) + throws IOException { return timeSeriesMetadata.loadChunkMetadataList(); } @@ -139,27 +140,25 @@ public class FileLoaderUtils { * @param chunkMetaData the corresponding chunk metadata * @param timeFilter it should be a TimeFilter instead of a ValueFilter */ - public static List<IPageReader> loadPageReader(ChunkMetadata chunkMetaData, Filter timeFilter) throws IOException { - return initChunkReader(chunkMetaData, timeFilter).loadPageReaderList(); - } - - private static IChunkReader initChunkReader(ChunkMetadata metaData, Filter timeFilter) throws IOException { - if (metaData == null) { + public static List<IPageReader> loadPageReaderList(ChunkMetadata chunkMetaData, Filter timeFilter) + throws IOException { + if (chunkMetaData == null) { throw new IOException("Can't init null chunkMeta"); } IChunkReader chunkReader; - IChunkLoader chunkLoader = metaData.getChunkLoader(); + IChunkLoader chunkLoader = chunkMetaData.getChunkLoader(); if (chunkLoader instanceof MemChunkLoader) { MemChunkLoader memChunkLoader = (MemChunkLoader) chunkLoader; chunkReader = new MemChunkReader(memChunkLoader.getChunk(), timeFilter); } else { - Chunk chunk = chunkLoader.loadChunk(metaData); + Chunk chunk = chunkLoader.loadChunk(chunkMetaData); chunkReader = new ChunkReader(chunk, timeFilter); chunkReader.hasNextSatisfiedPage(); } - return chunkReader; + return chunkReader.loadPageReaderList(); } + /** * load all ChunkMetadatas belong to the seriesPath */ diff --git a/site/src/main/.vuepress/config.js b/site/src/main/.vuepress/config.js index d0658ef..869536f 100644 --- a/site/src/main/.vuepress/config.js +++ b/site/src/main/.vuepress/config.js @@ -446,6 +446,7 @@ var config = { '5-DataQuery/5-GroupByQuery', '5-DataQuery/6-LastQuery', '5-DataQuery/7-AlignByDeviceQuery', + '5-DataQuery/8-ModificationHandle', ] }, { @@ -860,6 +861,7 @@ var config = { '5-DataQuery/5-GroupByQuery', '5-DataQuery/6-LastQuery', '5-DataQuery/7-AlignByDeviceQuery', + '5-DataQuery/8-ModificationHandle', ] }, {
