This is an automated email from the ASF dual-hosted git repository.
xingtanzjr pushed a commit to branch overlap_check_tool
in repository https://gitbox.apache.org/repos/asf/iotdb.git
The following commit(s) were added to refs/heads/overlap_check_tool by this
push:
new 11d63c88280 complete basic arch
11d63c88280 is described below
commit 11d63c882805f671bba3922b2f6e8b4b87b89ee0
Author: Jinrui.Zhang <[email protected]>
AuthorDate: Mon Aug 7 16:09:17 2023 +0800
complete basic arch
---
.../dataregion/compaction/tool/ITimeRange.java | 29 +++++
.../dataregion/compaction/tool/Interval.java | 38 +++++++
.../compaction/tool/ListTimeRangeImpl.java | 31 ++++++
.../compaction/tool/OverlapStatisticTool.java | 119 +++++++++++++++++++++
.../compaction/tool/TsFileStatisticReader.java | 57 ++++++++++
.../compaction/tool/UnseqSpaceStatistics.java | 30 ++++++
6 files changed, 304 insertions(+)
diff --git
a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/compaction/tool/ITimeRange.java
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/compaction/tool/ITimeRange.java
new file mode 100644
index 00000000000..9f1d64a81be
--- /dev/null
+++
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/compaction/tool/ITimeRange.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iotdb.db.storageengine.dataregion.compaction.tool;
+
+public interface ITimeRange {
+
+ // 将一个时间段增加到当前的时间范围对象中,增加的过程需要对当前的 TimeRange 进行维护,以方便后续提供高效的 overlap 检查
+ void addInterval(Interval interval);
+
+ // 判断传入的时间段是否与当前的时间范围有重叠
+ boolean isOverlapped(Interval interval);
+}
diff --git
a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/compaction/tool/Interval.java
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/compaction/tool/Interval.java
new file mode 100644
index 00000000000..7402ff34d20
--- /dev/null
+++
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/compaction/tool/Interval.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iotdb.db.storageengine.dataregion.compaction.tool;
+
+public class Interval {
+ private long start;
+ private long end;
+
+ public Interval(long start, long end) {
+ this.start = start;
+ this.end = end;
+ }
+
+ public long getStart() {
+ return start;
+ }
+
+ public long getEnd() {
+ return end;
+ }
+}
diff --git
a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/compaction/tool/ListTimeRangeImpl.java
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/compaction/tool/ListTimeRangeImpl.java
new file mode 100644
index 00000000000..6021b1a6288
--- /dev/null
+++
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/compaction/tool/ListTimeRangeImpl.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iotdb.db.storageengine.dataregion.compaction.tool;
+
+public class ListTimeRangeImpl implements ITimeRange {
+
+ @Override
+ public void addInterval(Interval interval) {}
+
+ @Override
+ public boolean isOverlapped(Interval interval) {
+ return false;
+ }
+}
diff --git
a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/compaction/tool/OverlapStatisticTool.java
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/compaction/tool/OverlapStatisticTool.java
new file mode 100644
index 00000000000..78ead1d82d0
--- /dev/null
+++
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/compaction/tool/OverlapStatisticTool.java
@@ -0,0 +1,119 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iotdb.db.storageengine.dataregion.compaction.tool;
+
+import
org.apache.iotdb.db.storageengine.dataregion.compaction.tool.TsFileStatisticReader.ChunkGroupStatistics;
+import org.apache.iotdb.tsfile.file.metadata.ChunkMetadata;
+
+import java.io.IOException;
+import java.util.List;
+
+public class OverlapStatisticTool {
+ private List<Long> timePartitions;
+ private long seqFileCount;
+
+ private long processedTimePartitionCount;
+ private long processedSeqFileCount;
+
+ public static void main(String args[]) {
+
+ // 1. 处理参数,从输入中获取数据目录的路径
+
+ // 2. 进行计算
+ OverlapStatisticTool tool = new OverlapStatisticTool();
+ tool.process(null);
+ }
+
+ public void process(List<String> dataDirs) {
+ // 0. 预处理
+ processDataDirs();
+
+ // 1. 构造最终结果集
+ OverlapStatistic statistic = new OverlapStatistic();
+
+ // 2. 根据时间分区的信息
+ for (Long timePartition : timePartitions) {
+ OverlapStatistic partialRet = processOneTimePartiton(timePartition,
dataDirs);
+ // 将该时间分区的结果集更新到最终结果集
+
+ // 更新并打印进度
+
+ }
+ }
+
+ private void updateProcessAndPrint(OverlapStatistic partialRet) {
+ processedTimePartitionCount += 1;
+ processedSeqFileCount += partialRet.totalFiles;
+
+ // 打印进度
+ }
+
+ private void processDataDirs() {
+ // 1. 遍历所有的时间分区,构造 timePartitions
+
+ // 2. 统计顺序文件的总数
+ }
+
+ private OverlapStatistic processOneTimePartiton(long timePartition,
List<String> dataDirs) {
+ // 1. 根据 timePartition,获取所有数据目录下的的乱序文件,构造 UnseqSpaceStatistics
+ UnseqSpaceStatistics unseqSpaceStatistics;
+
+ // 2. 遍历该时间分区下的所有顺序文件,获取每一个 chunk 的信息,依次进行 overlap 检查,并更新统计信息
+ OverlapStatistic overlapStatistic = new OverlapStatistic();
+
+ return overlapStatistic;
+ }
+
+ private UnseqSpaceStatistics buildUnseqSpaceStatistics(
+ long timePartition, List<String> dataDirs) {
+ UnseqSpaceStatistics unseqSpaceStatistics = new UnseqSpaceStatistics();
+ List<String> unseqFiles = getFilesInOnePartition(timePartition, dataDirs);
+ for (String unseqFile : unseqFiles) {
+ try (TsFileStatisticReader reader = new
TsFileStatisticReader(unseqFile)) {
+ List<ChunkGroupStatistics> chunkGroupStatisticsList =
reader.getChunkGroupStatistics();
+ for (ChunkGroupStatistics statistics : chunkGroupStatisticsList) {
+ for (ChunkMetadata chunkMetadata :
statistics.getChunkMetadataList()) {
+ unseqSpaceStatistics.update(
+ statistics.getDeviceID(),
+ chunkMetadata.getMeasurementUid(),
+ new Interval(chunkMetadata.getStartTime(),
chunkMetadata.getEndTime()));
+ }
+ }
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+ return unseqSpaceStatistics;
+ }
+
+ private List<String> getFilesInOnePartition(long timePartition, List<String>
dataDirs) {
+ return null;
+ }
+
+ private static class OverlapStatistic {
+ private long totalFiles;
+ private long totalChunkGroups;
+ private long totalChunks;
+
+ private long overlappedFiles;
+ private long overlappedChunkGroups;
+ private long overlappedChunks;
+ }
+}
diff --git
a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/compaction/tool/TsFileStatisticReader.java
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/compaction/tool/TsFileStatisticReader.java
new file mode 100644
index 00000000000..f7cbc96b625
--- /dev/null
+++
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/compaction/tool/TsFileStatisticReader.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iotdb.db.storageengine.dataregion.compaction.tool;
+
+import org.apache.iotdb.tsfile.file.metadata.ChunkGroupMetadata;
+import org.apache.iotdb.tsfile.file.metadata.ChunkMetadata;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.util.List;
+
+public class TsFileStatisticReader implements Closeable {
+
+ public TsFileStatisticReader(String filePath) {}
+
+ public List<ChunkGroupStatistics> getChunkGroupStatistics() {
+ return null;
+ }
+
+ @Override
+ public void close() throws IOException {}
+
+ public static class ChunkGroupStatistics {
+ private String deviceID;
+ private ChunkGroupMetadata chunkGroupMetadata;
+ private List<ChunkMetadata> chunkMetadataList;
+
+ public String getDeviceID() {
+ return deviceID;
+ }
+
+ public ChunkGroupMetadata getChunkGroupMetadata() {
+ return chunkGroupMetadata;
+ }
+
+ public List<ChunkMetadata> getChunkMetadataList() {
+ return chunkMetadataList;
+ }
+ }
+}
diff --git
a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/compaction/tool/UnseqSpaceStatistics.java
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/compaction/tool/UnseqSpaceStatistics.java
new file mode 100644
index 00000000000..116f9be3808
--- /dev/null
+++
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/compaction/tool/UnseqSpaceStatistics.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iotdb.db.storageengine.dataregion.compaction.tool;
+
+import java.util.Map;
+
+public class UnseqSpaceStatistics {
+ // 设备 -> 序列 -> 时间范围
+ private Map<String, Map<String, ITimeRange>> deviceStatisticMap;
+
+ // 更新某个设备的某个序列的时间范围
+ public void update(String device, String measurementUID, Interval interval)
{}
+}