This is an automated email from the ASF dual-hosted git repository. xingtanzjr pushed a commit to branch overlap_check_tool in repository https://gitbox.apache.org/repos/asf/iotdb.git
commit 83e82391bcf3b95ce7096aa2e2a7e84a16ec311c Author: Jinrui.Zhang <[email protected]> AuthorDate: Mon Aug 7 16:09:17 2023 +0800 complete basic arch --- .../dataregion/compaction/tool/ITimeRange.java | 29 +++++ .../dataregion/compaction/tool/Interval.java | 38 ++++++ .../compaction/tool/ListTimeRangeImpl.java | 31 +++++ .../compaction/tool/OverlapStatisticTool.java | 128 +++++++++++++++++++++ .../compaction/tool/TsFileStatisticReader.java | 57 +++++++++ .../compaction/tool/UnseqSpaceStatistics.java | 30 +++++ 6 files changed, 313 insertions(+) diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/compaction/tool/ITimeRange.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/compaction/tool/ITimeRange.java new file mode 100644 index 00000000000..9f1d64a81be --- /dev/null +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/compaction/tool/ITimeRange.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iotdb.db.storageengine.dataregion.compaction.tool; + +public interface ITimeRange { + + // 将一个时间段增加到当前的时间范围对象中,增加的过程需要对当前的 TimeRange 进行维护,以方便后续提供高效的 overlap 检查 + void addInterval(Interval interval); + + // 判断传入的时间段是否与当前的时间范围有重叠 + boolean isOverlapped(Interval interval); +} diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/compaction/tool/Interval.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/compaction/tool/Interval.java new file mode 100644 index 00000000000..7402ff34d20 --- /dev/null +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/compaction/tool/Interval.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iotdb.db.storageengine.dataregion.compaction.tool; + +public class Interval { + private long start; + private long end; + + public Interval(long start, long end) { + this.start = start; + this.end = end; + } + + public long getStart() { + return start; + } + + public long getEnd() { + return end; + } +} diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/compaction/tool/ListTimeRangeImpl.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/compaction/tool/ListTimeRangeImpl.java new file mode 100644 index 00000000000..6021b1a6288 --- /dev/null +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/compaction/tool/ListTimeRangeImpl.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iotdb.db.storageengine.dataregion.compaction.tool; + +public class ListTimeRangeImpl implements ITimeRange { + + @Override + public void addInterval(Interval interval) {} + + @Override + public boolean isOverlapped(Interval interval) { + return false; + } +} diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/compaction/tool/OverlapStatisticTool.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/compaction/tool/OverlapStatisticTool.java new file mode 100644 index 00000000000..41f8944c5dd --- /dev/null +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/compaction/tool/OverlapStatisticTool.java @@ -0,0 +1,128 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iotdb.db.storageengine.dataregion.compaction.tool; + +import org.apache.iotdb.db.storageengine.dataregion.compaction.tool.TsFileStatisticReader.ChunkGroupStatistics; +import org.apache.iotdb.tsfile.file.metadata.ChunkMetadata; + +import java.io.IOException; +import java.util.List; + +public class OverlapStatisticTool { + private List<Long> timePartitions; + private long seqFileCount; + + private long processedTimePartitionCount; + private long processedSeqFileCount; + + public static void main(String args[]) { + + // 1. 处理参数,从输入中获取数据目录的路径 + + // 2. 进行计算 + OverlapStatisticTool tool = new OverlapStatisticTool(); + tool.process(null); + } + + public void process(List<String> dataDirs) { + // 0. 预处理 + processDataDirs(); + + // 1. 构造最终结果集 + OverlapStatistic statistic = new OverlapStatistic(); + + // 2. 根据时间分区的信息 + for (Long timePartition : timePartitions) { + OverlapStatistic partialRet = processOneTimePartiton(timePartition, dataDirs); + // 将该时间分区的结果集更新到最终结果集 + + // 更新并打印进度 + + } + } + + private void updateProcessAndPrint(OverlapStatistic partialRet) { + processedTimePartitionCount += 1; + processedSeqFileCount += partialRet.totalFiles; + + // 打印进度 + } + + private void processDataDirs() { + // 1. 遍历所有的时间分区,构造 timePartitions + + // 2. 统计顺序文件的总数 + } + + private OverlapStatistic processOneTimePartiton(long timePartition, List<String> dataDirs) { + // 1. 根据 timePartition,获取所有数据目录下的的乱序文件,构造 UnseqSpaceStatistics + UnseqSpaceStatistics unseqSpaceStatistics; + + // 2. 遍历该时间分区下的所有顺序文件,获取每一个 chunk 的信息,依次进行 overlap 检查,并更新统计信息 + OverlapStatistic overlapStatistic = new OverlapStatistic(); + List<String> seqFiles = getFilesInOnePartition(timePartition, dataDirs, true); + for (String seqFile : seqFiles) { + try (TsFileStatisticReader reader = new TsFileStatisticReader(seqFile)) { + // 统计顺序文件的信息并更新到 overlapStatistic + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + return overlapStatistic; + } + + private UnseqSpaceStatistics buildUnseqSpaceStatistics( + long timePartition, List<String> dataDirs) { + UnseqSpaceStatistics unseqSpaceStatistics = new UnseqSpaceStatistics(); + List<String> unseqFiles = getFilesInOnePartition(timePartition, dataDirs, false); + for (String unseqFile : unseqFiles) { + try (TsFileStatisticReader reader = new TsFileStatisticReader(unseqFile)) { + List<ChunkGroupStatistics> chunkGroupStatisticsList = reader.getChunkGroupStatistics(); + for (ChunkGroupStatistics statistics : chunkGroupStatisticsList) { + for (ChunkMetadata chunkMetadata : statistics.getChunkMetadataList()) { + unseqSpaceStatistics.update( + statistics.getDeviceID(), + chunkMetadata.getMeasurementUid(), + new Interval(chunkMetadata.getStartTime(), chunkMetadata.getEndTime())); + } + } + } catch (IOException e) { + throw new RuntimeException(e); + } + } + return unseqSpaceStatistics; + } + + private List<String> getFilesInOnePartition( + long timePartition, List<String> dataDirs, boolean isSeq) { + return null; + } + + private static class OverlapStatistic { + private long totalFiles; + private long totalChunkGroups; + private long totalChunks; + + private long overlappedFiles; + private long overlappedChunkGroups; + private long overlappedChunks; + } +} diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/compaction/tool/TsFileStatisticReader.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/compaction/tool/TsFileStatisticReader.java new file mode 100644 index 00000000000..f7cbc96b625 --- /dev/null +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/compaction/tool/TsFileStatisticReader.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iotdb.db.storageengine.dataregion.compaction.tool; + +import org.apache.iotdb.tsfile.file.metadata.ChunkGroupMetadata; +import org.apache.iotdb.tsfile.file.metadata.ChunkMetadata; + +import java.io.Closeable; +import java.io.IOException; +import java.util.List; + +public class TsFileStatisticReader implements Closeable { + + public TsFileStatisticReader(String filePath) {} + + public List<ChunkGroupStatistics> getChunkGroupStatistics() { + return null; + } + + @Override + public void close() throws IOException {} + + public static class ChunkGroupStatistics { + private String deviceID; + private ChunkGroupMetadata chunkGroupMetadata; + private List<ChunkMetadata> chunkMetadataList; + + public String getDeviceID() { + return deviceID; + } + + public ChunkGroupMetadata getChunkGroupMetadata() { + return chunkGroupMetadata; + } + + public List<ChunkMetadata> getChunkMetadataList() { + return chunkMetadataList; + } + } +} diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/compaction/tool/UnseqSpaceStatistics.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/compaction/tool/UnseqSpaceStatistics.java new file mode 100644 index 00000000000..116f9be3808 --- /dev/null +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/compaction/tool/UnseqSpaceStatistics.java @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iotdb.db.storageengine.dataregion.compaction.tool; + +import java.util.Map; + +public class UnseqSpaceStatistics { + // 设备 -> 序列 -> 时间范围 + private Map<String, Map<String, ITimeRange>> deviceStatisticMap; + + // 更新某个设备的某个序列的时间范围 + public void update(String device, String measurementUID, Interval interval) {} +}
