This is an automated email from the ASF dual-hosted git repository. sunzesong pushed a commit to branch experimental/index in repository https://gitbox.apache.org/repos/asf/iotdb.git
commit a92ee19ade6b9ed2d628ccdec86d6b74660b87ab Author: samperson1997 <[email protected]> AuthorDate: Wed Nov 10 13:56:51 2021 +0800 sepersate MetadataIndexTree --- .../{test1832 => test1835}/TsFileSketchTool.java | 8 +- .../TsFileSketchToolV2.java} | 195 ++++++++++----------- .../apache/iotdb/tsfile/test1835/TsFileWrite.java | 110 ++++++++++++ .../file/metadata/MetadataIndexConstructor.java | 42 +++-- .../tsfile/file/metadata/TsFileMetadataV2.java | 126 +++++++++++++ .../fileSystem/fsFactory/LocalFSFactory.java | 20 +-- .../iotdb/tsfile/read/TsFileSequenceReader.java | 40 +++++ .../apache/iotdb/tsfile/write/TsFileWriter.java | 2 +- .../iotdb/tsfile/write/writer/TsFileIOWriter.java | 97 +++++++++- .../iotdb/tsfile/utils/FilePathUtilsTest.java | 4 +- 10 files changed, 500 insertions(+), 144 deletions(-) diff --git a/example/tsfile/src/main/java/org/apache/iotdb/tsfile/test1832/TsFileSketchTool.java b/example/tsfile/src/main/java/org/apache/iotdb/tsfile/test1835/TsFileSketchTool.java similarity index 98% copy from example/tsfile/src/main/java/org/apache/iotdb/tsfile/test1832/TsFileSketchTool.java copy to example/tsfile/src/main/java/org/apache/iotdb/tsfile/test1835/TsFileSketchTool.java index ead2726..71a6aa5 100644 --- a/example/tsfile/src/main/java/org/apache/iotdb/tsfile/test1832/TsFileSketchTool.java +++ b/example/tsfile/src/main/java/org/apache/iotdb/tsfile/test1835/TsFileSketchTool.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.iotdb.tsfile.test1832; +package org.apache.iotdb.tsfile.test1835; import org.apache.iotdb.tsfile.common.conf.TSFileConfig; import org.apache.iotdb.tsfile.file.MetaMarker; @@ -56,10 +56,8 @@ public class TsFileSketchTool { private String splitStr; // for split different part of TsFile public static void main(String[] args) throws IOException { - Pair<String, String> fileNames = checkArgs(args); - String filename = - "/Users/samperson1997/git/iotdb/data/data/sequence/root.sg/0/1832/test5.tsfile"; - String outFile = "/Users/samperson1997/git/iotdb/data/data/sequence/root.sg/0/1832/1.txt"; + String filename = "/Users/samperson1997/git/iotdb/data/data/sequence/root.sg/1/1/test1.tsfile"; + String outFile = "/Users/samperson1997/git/iotdb/data/data/sequence/root.sg/1/1/test1.txt"; new TsFileSketchTool(filename, outFile).run(); } diff --git a/example/tsfile/src/main/java/org/apache/iotdb/tsfile/test1832/TsFileSketchTool.java b/example/tsfile/src/main/java/org/apache/iotdb/tsfile/test1835/TsFileSketchToolV2.java similarity index 77% rename from example/tsfile/src/main/java/org/apache/iotdb/tsfile/test1832/TsFileSketchTool.java rename to example/tsfile/src/main/java/org/apache/iotdb/tsfile/test1835/TsFileSketchToolV2.java index ead2726..869435f 100644 --- a/example/tsfile/src/main/java/org/apache/iotdb/tsfile/test1832/TsFileSketchTool.java +++ b/example/tsfile/src/main/java/org/apache/iotdb/tsfile/test1835/TsFileSketchToolV2.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.iotdb.tsfile.test1832; +package org.apache.iotdb.tsfile.test1835; import org.apache.iotdb.tsfile.common.conf.TSFileConfig; import org.apache.iotdb.tsfile.file.MetaMarker; @@ -29,51 +29,58 @@ import org.apache.iotdb.tsfile.file.metadata.IChunkMetadata; import org.apache.iotdb.tsfile.file.metadata.MetadataIndexEntry; import org.apache.iotdb.tsfile.file.metadata.MetadataIndexNode; import org.apache.iotdb.tsfile.file.metadata.TimeseriesMetadata; -import org.apache.iotdb.tsfile.file.metadata.TsFileMetadata; +import org.apache.iotdb.tsfile.file.metadata.TsFileMetadataV2; import org.apache.iotdb.tsfile.file.metadata.enums.MetadataIndexNodeType; import org.apache.iotdb.tsfile.fileSystem.FSFactoryProducer; import org.apache.iotdb.tsfile.read.TsFileSequenceReader; import org.apache.iotdb.tsfile.read.common.Chunk; import org.apache.iotdb.tsfile.read.common.Path; +import org.apache.iotdb.tsfile.read.reader.TsFileInput; import org.apache.iotdb.tsfile.utils.BloomFilter; import org.apache.iotdb.tsfile.utils.Pair; +import org.apache.iotdb.tsfile.utils.ReadWriteIOUtils; import java.io.FileWriter; import java.io.IOException; import java.io.PrintWriter; -import java.nio.BufferOverflowException; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.TreeMap; -public class TsFileSketchTool { +public class TsFileSketchToolV2 { private String filename; + private String indexFileName; private PrintWriter pw; private TsFileSketchToolReader reader; + private TsFileSketchToolReader indexReader; private String splitStr; // for split different part of TsFile public static void main(String[] args) throws IOException { - Pair<String, String> fileNames = checkArgs(args); - String filename = - "/Users/samperson1997/git/iotdb/data/data/sequence/root.sg/0/1832/test5.tsfile"; - String outFile = "/Users/samperson1997/git/iotdb/data/data/sequence/root.sg/0/1832/1.txt"; - new TsFileSketchTool(filename, outFile).run(); + String filename = "/Users/samperson1997/git/iotdb/data/data/sequence/root.sg/1/1/test0.tsfile"; + String outFile = "/Users/samperson1997/git/iotdb/data/data/sequence/root.sg/1/1/test0.txt"; + String indexFileName = + "/Users/samperson1997/git/iotdb/data/data/sequence/root.sg/1/1/test0.tsfile.index"; + + new TsFileSketchToolV2(filename, indexFileName, outFile).run(); } /** * construct TsFileSketchTool * * @param filename input file path + * @param indexFileName index file path * @param outFile output file path */ - public TsFileSketchTool(String filename, String outFile) { + public TsFileSketchToolV2(String filename, String indexFileName, String outFile) { try { this.filename = filename; + this.indexFileName = indexFileName; pw = new PrintWriter(new FileWriter(outFile)); reader = new TsFileSketchToolReader(filename); + indexReader = new TsFileSketchToolReader(indexFileName); StringBuilder str1 = new StringBuilder(); for (int i = 0; i < 21; i++) { str1.append("|"); @@ -93,7 +100,7 @@ public class TsFileSketchTool { printlnBoth(pw, "file length: " + length); // get metadata information - TsFileMetadata tsFileMetaData = reader.readFileMetadata(); + TsFileMetadataV2 tsFileMetaData = reader.readFileMetadataV2(); List<ChunkGroupMetadata> allChunkGroupMetadata = new ArrayList<>(); reader.selfCheck(null, allChunkGroupMetadata, false); @@ -104,27 +111,17 @@ public class TsFileSketchTool { printChunk(allChunkGroupMetadata); // metadata begins - if (tsFileMetaData.getMetadataIndex().getChildren().isEmpty()) { - printlnBoth(pw, String.format("%20s", reader.getFileMetadataPos() - 1) + "|\t[marker] 2"); - } else { - printlnBoth( - pw, String.format("%20s", reader.readFileMetadata().getMetaOffset()) + "|\t[marker] 2"); - } + printlnBoth(pw, String.format("%20s", tsFileMetaData.getMetaOffset()) + "|\t[marker] 2"); + + // System.out.println(reader.getFileMetadataPos()); + // get all timeseries index Map<Long, Pair<Path, TimeseriesMetadata>> timeseriesMetadataMap = - reader.getAllTimeseriesMetadataWithOffset(); + reader.getAllTimeseriesMetadataWithOffset(reader.position(), reader.getFileMetadataPos()); // print timeseries index printTimeseriesIndex(timeseriesMetadataMap); - MetadataIndexNode metadataIndexNode = tsFileMetaData.getMetadataIndex(); - TreeMap<Long, MetadataIndexNode> metadataIndexNodeMap = new TreeMap<>(); - List<String> treeOutputStringBuffer = new ArrayList<>(); - loadIndexTree(metadataIndexNode, metadataIndexNodeMap, treeOutputStringBuffer, 0); - - // print IndexOfTimerseriesIndex - printIndexOfTimerseriesIndex(metadataIndexNodeMap); - // print TsFile Metadata printTsFileMetadata(tsFileMetaData); @@ -133,6 +130,14 @@ public class TsFileSketchTool { pw, "---------------------------- IndexOfTimerseriesIndex Tree -----------------------------"); // print index tree + MetadataIndexNode metadataIndexNode = readMetadataIndex(); + TreeMap<Long, MetadataIndexNode> metadataIndexNodeMap = new TreeMap<>(); + List<String> treeOutputStringBuffer = new ArrayList<>(); + loadIndexTree(metadataIndexNode, metadataIndexNodeMap, treeOutputStringBuffer, 0); + + // print IndexOfTimerseriesIndex + printIndexOfTimerseriesIndex(metadataIndexNodeMap); + for (String str : treeOutputStringBuffer) { printlnBoth(pw, str); } @@ -142,22 +147,11 @@ public class TsFileSketchTool { pw.close(); } - private void printTsFileMetadata(TsFileMetadata tsFileMetaData) { + private void printTsFileMetadata(TsFileMetadataV2 tsFileMetaData) { try { printlnBoth(pw, String.format("%20s", reader.getFileMetadataPos()) + "|\t[TsFileMetadata]"); printlnBoth( pw, String.format("%20s", "") + "|\t\t[meta offset] " + tsFileMetaData.getMetaOffset()); - printlnBoth( - pw, - String.format("%20s", "") - + "|\t\t[num of devices] " - + tsFileMetaData.getMetadataIndex().getChildren().size()); - printlnBoth( - pw, - String.format("%20s", "") - + "|\t\t" - + tsFileMetaData.getMetadataIndex().getChildren().size() - + " key&TsMetadataIndex"); // bloom filter BloomFilter bloomFilter = tsFileMetaData.getBloomFilter(); printlnBoth( @@ -339,7 +333,8 @@ public class TsFileSketchTool { + entry.getValue().left + ", tsDataType:" + entry.getValue().right.getTSDataType()); - for (IChunkMetadata chunkMetadata : reader.getChunkMetadataList(entry.getValue().left)) { + for (IChunkMetadata chunkMetadata : + reader.getChunkMetadataListV3(entry.getValue().left, false)) { printlnBoth( pw, String.format("%20s", "") @@ -394,7 +389,7 @@ public class TsFileSketchTool { endOffset = metadataIndexNode.getChildren().get(i + 1).getOffset(); } MetadataIndexNode subNode = - reader.getMetadataIndexNode(metadataIndexEntry.getOffset(), endOffset); + indexReader.getMetadataIndexNode(metadataIndexEntry.getOffset(), endOffset); metadataIndexNodeMap.put(metadataIndexEntry.getOffset(), subNode); loadIndexTree(subNode, metadataIndexNodeMap, treeOutputStringBuffer, deep + 1); } @@ -406,16 +401,18 @@ public class TsFileSketchTool { pw.println(str); } - private static Pair<String, String> checkArgs(String[] args) { - String filename = "test.tsfile"; - String outFile = "TsFile_sketch_view.txt"; - if (args.length == 1) { - filename = args[0]; - } else if (args.length == 2) { - filename = args[0]; - outFile = args[1]; - } - return new Pair<>(filename, outFile); + private MetadataIndexNode readMetadataIndex() throws IOException { + TsFileInput tsFileInput = FSFactoryProducer.getFileInputFactory().getTsFileInput(indexFileName); + long totalSize = tsFileInput.size(); + ByteBuffer lastNodeSizeBuffer = ByteBuffer.allocate(Integer.BYTES); + tsFileInput.read(lastNodeSizeBuffer, totalSize - Integer.BYTES); + lastNodeSizeBuffer.flip(); + + int lastNodeSize = ReadWriteIOUtils.readInt(lastNodeSizeBuffer); + ByteBuffer lastNode = ByteBuffer.allocate(lastNodeSize); + tsFileInput.read(lastNode, totalSize - lastNodeSize - Integer.BYTES); + lastNode.flip(); + return MetadataIndexNode.deserializeFrom(lastNode); } private class TsFileSketchToolReader extends TsFileSequenceReader { @@ -440,69 +437,53 @@ public class TsFileSketchTool { Map<Long, Pair<Path, TimeseriesMetadata>> timeseriesMetadataMap, boolean needChunkMetadata) throws IOException { - try { - if (type.equals(MetadataIndexNodeType.LEAF_MEASUREMENT)) { - while (buffer.hasRemaining()) { - long pos = startOffset + buffer.position(); - TimeseriesMetadata timeseriesMetadata = - TimeseriesMetadata.deserializeFrom(buffer, needChunkMetadata); - timeseriesMetadataMap.put( - pos, - new Pair<>( - new Path(deviceId, timeseriesMetadata.getMeasurementId()), timeseriesMetadata)); - } - } else { - // deviceId should be determined by LEAF_DEVICE node - if (type.equals(MetadataIndexNodeType.LEAF_DEVICE)) { - deviceId = metadataIndex.getName(); - } - MetadataIndexNode metadataIndexNode = MetadataIndexNode.deserializeFrom(buffer); - int metadataIndexListSize = metadataIndexNode.getChildren().size(); - for (int i = 0; i < metadataIndexListSize; i++) { - long endOffset = metadataIndexNode.getEndOffset(); - if (i != metadataIndexListSize - 1) { - endOffset = metadataIndexNode.getChildren().get(i + 1).getOffset(); - } - ByteBuffer nextBuffer = - readData(metadataIndexNode.getChildren().get(i).getOffset(), endOffset); - generateMetadataIndexWithOffset( - metadataIndexNode.getChildren().get(i).getOffset(), - metadataIndexNode.getChildren().get(i), - nextBuffer, - deviceId, - metadataIndexNode.getNodeType(), - timeseriesMetadataMap, - needChunkMetadata); + if (type.equals(MetadataIndexNodeType.LEAF_MEASUREMENT)) { + while (buffer.hasRemaining()) { + long pos = startOffset + buffer.position(); + TimeseriesMetadata timeseriesMetadata = + TimeseriesMetadata.deserializeFrom(buffer, needChunkMetadata); + timeseriesMetadataMap.put( + pos, + new Pair<>( + new Path(deviceId, timeseriesMetadata.getMeasurementId()), timeseriesMetadata)); + } + } else { + // deviceId should be determined by LEAF_DEVICE node + if (type.equals(MetadataIndexNodeType.LEAF_DEVICE)) { + deviceId = metadataIndex.getName(); + } + MetadataIndexNode metadataIndexNode = MetadataIndexNode.deserializeFrom(buffer); + int metadataIndexListSize = metadataIndexNode.getChildren().size(); + for (int i = 0; i < metadataIndexListSize; i++) { + long endOffset = metadataIndexNode.getEndOffset(); + if (i != metadataIndexListSize - 1) { + endOffset = metadataIndexNode.getChildren().get(i + 1).getOffset(); } + ByteBuffer nextBuffer = + readData(metadataIndexNode.getChildren().get(i).getOffset(), endOffset); + generateMetadataIndexWithOffset( + metadataIndexNode.getChildren().get(i).getOffset(), + metadataIndexNode.getChildren().get(i), + nextBuffer, + deviceId, + metadataIndexNode.getNodeType(), + timeseriesMetadataMap, + needChunkMetadata); } - } catch (BufferOverflowException e) { - throw e; } } - public Map<Long, Pair<Path, TimeseriesMetadata>> getAllTimeseriesMetadataWithOffset() - throws IOException { - if (tsFileMetaData == null) { - readFileMetadata(); - } - MetadataIndexNode metadataIndexNode = tsFileMetaData.getMetadataIndex(); + public Map<Long, Pair<Path, TimeseriesMetadata>> getAllTimeseriesMetadataWithOffset( + long startOffset, long endOffset) throws IOException { Map<Long, Pair<Path, TimeseriesMetadata>> timeseriesMetadataMap = new TreeMap<>(); - List<MetadataIndexEntry> metadataIndexEntryList = metadataIndexNode.getChildren(); - for (int i = 0; i < metadataIndexEntryList.size(); i++) { - MetadataIndexEntry metadataIndexEntry = metadataIndexEntryList.get(i); - long endOffset = tsFileMetaData.getMetadataIndex().getEndOffset(); - if (i != metadataIndexEntryList.size() - 1) { - endOffset = metadataIndexEntryList.get(i + 1).getOffset(); - } - ByteBuffer buffer = readData(metadataIndexEntry.getOffset(), endOffset); - generateMetadataIndexWithOffset( - metadataIndexEntry.getOffset(), - metadataIndexEntry, - buffer, - null, - metadataIndexNode.getNodeType(), - timeseriesMetadataMap, - false); + + ByteBuffer buffer = readData(startOffset, endOffset); + while (buffer.hasRemaining()) { + int bufferPos = buffer.position(); + TimeseriesMetadata timeseriesMetaData = TimeseriesMetadata.deserializeFrom(buffer, false); + timeseriesMetadataMap.put( + reader.position() + bufferPos, + new Pair<>(new Path("d1", timeseriesMetaData.getMeasurementId()), timeseriesMetaData)); } return timeseriesMetadataMap; } diff --git a/example/tsfile/src/main/java/org/apache/iotdb/tsfile/test1835/TsFileWrite.java b/example/tsfile/src/main/java/org/apache/iotdb/tsfile/test1835/TsFileWrite.java new file mode 100644 index 0000000..aa087b4 --- /dev/null +++ b/example/tsfile/src/main/java/org/apache/iotdb/tsfile/test1835/TsFileWrite.java @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iotdb.tsfile.test1835; + +import org.apache.iotdb.tsfile.Constant; +import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType; +import org.apache.iotdb.tsfile.file.metadata.enums.TSEncoding; +import org.apache.iotdb.tsfile.fileSystem.FSFactoryProducer; +import org.apache.iotdb.tsfile.read.common.Path; +import org.apache.iotdb.tsfile.write.TsFileWriter; +import org.apache.iotdb.tsfile.write.record.TSRecord; +import org.apache.iotdb.tsfile.write.record.datapoint.DataPoint; +import org.apache.iotdb.tsfile.write.record.datapoint.LongDataPoint; +import org.apache.iotdb.tsfile.write.schema.UnaryMeasurementSchema; + +import org.apache.commons.cli.BasicParser; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.Options; + +import java.io.File; +import java.util.Random; + +/** + * An example of writing data with TSRecord to TsFile It uses the interface: public void + * addMeasurement(MeasurementSchema measurementSchema) throws WriteProcessException + */ +public class TsFileWrite { + public static int deviceNum = 1; + public static int sensorNum = 1; + public static int fileNum = 1; + + public static void main(String[] args) { + Options opts = new Options(); + // Option chunkNumOption = + // OptionBuilder.withArgName("args").withLongOpt("chunkNum").hasArg().create("c"); + // opts.addOption(chunkNumOption); + + BasicParser parser = new BasicParser(); + CommandLine cl; + try { + cl = parser.parse(opts, args); + // chunkNum = Integer.parseInt(cl.getOptionValue("c")); + } catch (Exception e) { + e.printStackTrace(); + } + + for (int fileIndex = 0; fileIndex < fileNum; fileIndex++) { + try { + String path = + "/Users/samperson1997/git/iotdb/data/data/sequence/root.sg/1/" + + deviceNum + + "/test" + + 0 + + ".tsfile"; + File f = FSFactoryProducer.getFSFactory().getFile(path); + if (f.exists()) { + f.delete(); + } + + try { + TsFileWriter tsFileWriter = new TsFileWriter(f); + // 1000 timeseries + for (int i = 1; i <= 1000; i++) { + tsFileWriter.registerTimeseries( + new Path(Constant.DEVICE_PREFIX, Constant.SENSOR_ + i), + new UnaryMeasurementSchema(Constant.SENSOR_ + i, TSDataType.INT64, TSEncoding.RLE)); + } + // construct TSRecord + for (int i = 1; i <= 100; i++) { + TSRecord tsRecord = new TSRecord(i, Constant.DEVICE_PREFIX); + for (int t = 1; t <= 1000; t++) { + DataPoint dPoint1 = new LongDataPoint(Constant.SENSOR_ + t, new Random().nextLong()); + tsRecord.addTuple(dPoint1); + } + // write TSRecord + tsFileWriter.write(tsRecord); + if (i % 100 == 0) { + tsFileWriter.flushAllChunkGroups(); + } + } + tsFileWriter.close(); + } catch (Throwable e) { + e.printStackTrace(); + System.out.println(e.getMessage()); + } + + } catch (Throwable e) { + e.printStackTrace(); + System.out.println(e.getMessage()); + } + } + } +} diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/MetadataIndexConstructor.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/MetadataIndexConstructor.java index de20f40..f8926d4 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/MetadataIndexConstructor.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/MetadataIndexConstructor.java @@ -44,11 +44,14 @@ public class MetadataIndexConstructor { * Construct metadata index tree * * @param deviceTimeseriesMetadataMap device => TimeseriesMetadata list - * @param out tsfile output + * @param tsFileOutput tsfile output + * @param metadataIndexOutput metadataIndex output */ @SuppressWarnings("squid:S3776") // Suppress high Cognitive Complexity warning public static MetadataIndexNode constructMetadataIndex( - Map<String, List<TimeseriesMetadata>> deviceTimeseriesMetadataMap, TsFileOutput out) + Map<String, List<TimeseriesMetadata>> deviceTimeseriesMetadataMap, + TsFileOutput tsFileOutput, + TsFileOutput metadataIndexOutput) throws IOException { Map<String, MetadataIndexNode> deviceMetadataIndexMap = new TreeMap<>(); @@ -68,21 +71,25 @@ public class MetadataIndexConstructor { if (serializedTimeseriesMetadataNum == 0 || serializedTimeseriesMetadataNum >= config.getMaxDegreeOfIndexNode()) { if (currentIndexNode.isFull()) { - addCurrentIndexNodeToQueue(currentIndexNode, measurementMetadataIndexQueue, out); + addCurrentIndexNodeToQueue( + currentIndexNode, measurementMetadataIndexQueue, tsFileOutput); currentIndexNode = new MetadataIndexNode(MetadataIndexNodeType.LEAF_MEASUREMENT); } currentIndexNode.addEntry( - new MetadataIndexEntry(timeseriesMetadata.getMeasurementId(), out.getPosition())); + new MetadataIndexEntry( + timeseriesMetadata.getMeasurementId(), tsFileOutput.getPosition())); serializedTimeseriesMetadataNum = 0; } - timeseriesMetadata.serializeTo(out.wrapAsStream()); + timeseriesMetadata.serializeTo(tsFileOutput.wrapAsStream()); serializedTimeseriesMetadataNum++; } - addCurrentIndexNodeToQueue(currentIndexNode, measurementMetadataIndexQueue, out); + addCurrentIndexNodeToQueue(currentIndexNode, measurementMetadataIndexQueue, tsFileOutput); deviceMetadataIndexMap.put( entry.getKey(), generateRootNode( - measurementMetadataIndexQueue, out, MetadataIndexNodeType.INTERNAL_MEASUREMENT)); + measurementMetadataIndexQueue, + metadataIndexOutput, + MetadataIndexNodeType.INTERNAL_MEASUREMENT)); } // if not exceed the max child nodes num, ignore the device index and directly point to the @@ -91,10 +98,11 @@ public class MetadataIndexConstructor { MetadataIndexNode metadataIndexNode = new MetadataIndexNode(MetadataIndexNodeType.LEAF_DEVICE); for (Map.Entry<String, MetadataIndexNode> entry : deviceMetadataIndexMap.entrySet()) { - metadataIndexNode.addEntry(new MetadataIndexEntry(entry.getKey(), out.getPosition())); - entry.getValue().serializeTo(out.wrapAsStream()); + metadataIndexNode.addEntry( + new MetadataIndexEntry(entry.getKey(), metadataIndexOutput.getPosition())); + entry.getValue().serializeTo(metadataIndexOutput.wrapAsStream()); } - metadataIndexNode.setEndOffset(out.getPosition()); + metadataIndexNode.setEndOffset(metadataIndexOutput.getPosition()); return metadataIndexNode; } @@ -105,16 +113,18 @@ public class MetadataIndexConstructor { for (Map.Entry<String, MetadataIndexNode> entry : deviceMetadataIndexMap.entrySet()) { // when constructing from internal node, each node is related to an entry if (currentIndexNode.isFull()) { - addCurrentIndexNodeToQueue(currentIndexNode, deviceMetadataIndexQueue, out); + addCurrentIndexNodeToQueue(currentIndexNode, deviceMetadataIndexQueue, metadataIndexOutput); currentIndexNode = new MetadataIndexNode(MetadataIndexNodeType.LEAF_DEVICE); } - currentIndexNode.addEntry(new MetadataIndexEntry(entry.getKey(), out.getPosition())); - entry.getValue().serializeTo(out.wrapAsStream()); + currentIndexNode.addEntry( + new MetadataIndexEntry(entry.getKey(), metadataIndexOutput.getPosition())); + entry.getValue().serializeTo(metadataIndexOutput.wrapAsStream()); } - addCurrentIndexNodeToQueue(currentIndexNode, deviceMetadataIndexQueue, out); + addCurrentIndexNodeToQueue(currentIndexNode, deviceMetadataIndexQueue, metadataIndexOutput); MetadataIndexNode deviceMetadataIndexNode = - generateRootNode(deviceMetadataIndexQueue, out, MetadataIndexNodeType.INTERNAL_DEVICE); - deviceMetadataIndexNode.setEndOffset(out.getPosition()); + generateRootNode( + deviceMetadataIndexQueue, metadataIndexOutput, MetadataIndexNodeType.INTERNAL_DEVICE); + deviceMetadataIndexNode.setEndOffset(metadataIndexOutput.getPosition()); return deviceMetadataIndexNode; } diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/TsFileMetadataV2.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/TsFileMetadataV2.java new file mode 100644 index 0000000..64246b7 --- /dev/null +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/TsFileMetadataV2.java @@ -0,0 +1,126 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * <p> + * http://www.apache.org/licenses/LICENSE-2.0 + * <p> + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iotdb.tsfile.file.metadata; + +import org.apache.iotdb.tsfile.common.conf.TSFileDescriptor; +import org.apache.iotdb.tsfile.read.common.Path; +import org.apache.iotdb.tsfile.utils.BloomFilter; +import org.apache.iotdb.tsfile.utils.ReadWriteForEncodingUtils; +import org.apache.iotdb.tsfile.utils.ReadWriteIOUtils; + +import java.io.IOException; +import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.util.Set; + +/** TSFileMetaData collects all metadata info and saves in its data structure. */ +public class TsFileMetadataV2 { + + // bloom filter + private BloomFilter bloomFilter; + + // offset of MetaMarker.SEPARATOR + private long metaOffset; + + /** + * deserialize data from the buffer. + * + * @param buffer -buffer use to deserialize + * @return -a instance of TsFileMetaData + */ + public static TsFileMetadataV2 deserializeFrom(ByteBuffer buffer) { + TsFileMetadataV2 fileMetaData = new TsFileMetadataV2(); + + // metaOffset + long metaOffset = ReadWriteIOUtils.readLong(buffer); + fileMetaData.setMetaOffset(metaOffset); + + // read bloom filter + if (buffer.hasRemaining()) { + byte[] bytes = ReadWriteIOUtils.readByteBufferWithSelfDescriptionLength(buffer); + int filterSize = ReadWriteForEncodingUtils.readUnsignedVarInt(buffer); + int hashFunctionSize = ReadWriteForEncodingUtils.readUnsignedVarInt(buffer); + fileMetaData.bloomFilter = BloomFilter.buildBloomFilter(bytes, filterSize, hashFunctionSize); + } + + return fileMetaData; + } + + public BloomFilter getBloomFilter() { + return bloomFilter; + } + + public void setBloomFilter(BloomFilter bloomFilter) { + this.bloomFilter = bloomFilter; + } + + /** + * use the given outputStream to serialize. + * + * @param outputStream -output stream to determine byte length + * @return -byte length + */ + public int serializeTo(OutputStream outputStream) throws IOException { + return ReadWriteIOUtils.write(metaOffset, outputStream); + } + + /** + * use the given outputStream to serialize bloom filter. + * + * @param outputStream -output stream to determine byte length + * @return -byte length + */ + public int serializeBloomFilter(OutputStream outputStream, Set<Path> paths) throws IOException { + int byteLen = 0; + BloomFilter filter = buildBloomFilter(paths); + + byte[] bytes = filter.serialize(); + byteLen += ReadWriteForEncodingUtils.writeUnsignedVarInt(bytes.length, outputStream); + outputStream.write(bytes); + byteLen += bytes.length; + byteLen += ReadWriteForEncodingUtils.writeUnsignedVarInt(filter.getSize(), outputStream); + byteLen += + ReadWriteForEncodingUtils.writeUnsignedVarInt(filter.getHashFunctionSize(), outputStream); + return byteLen; + } + + /** + * build bloom filter + * + * @return bloom filter + */ + private BloomFilter buildBloomFilter(Set<Path> paths) { + BloomFilter filter = + BloomFilter.getEmptyBloomFilter( + TSFileDescriptor.getInstance().getConfig().getBloomFilterErrorRate(), paths.size()); + for (Path path : paths) { + filter.add(path.toString()); + } + return filter; + } + + public long getMetaOffset() { + return metaOffset; + } + + public void setMetaOffset(long metaOffset) { + this.metaOffset = metaOffset; + } +} diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/fileSystem/fsFactory/LocalFSFactory.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/fileSystem/fsFactory/LocalFSFactory.java index 365ded1..8ea24c5 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/fileSystem/fsFactory/LocalFSFactory.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/fileSystem/fsFactory/LocalFSFactory.java @@ -19,7 +19,7 @@ package org.apache.iotdb.tsfile.fileSystem.fsFactory; -import org.apache.commons.io.FileUtils; +// import org.apache.commons.io.FileUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -112,15 +112,15 @@ public class LocalFSFactory implements FSFactory { @Override public void moveFile(File srcFile, File destFile) { - try { - FileUtils.moveFile(srcFile, destFile); - } catch (IOException e) { - logger.error( - "Failed to move file from {} to {}. ", - srcFile.getAbsolutePath(), - destFile.getAbsolutePath(), - e); - } + // try { + // FileUtils.moveFile(srcFile, destFile); + // } catch (IOException e) { + // logger.error( + // "Failed to move file from {} to {}. ", + // srcFile.getAbsolutePath(), + // destFile.getAbsolutePath(), + // e); + // } } @Override diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/read/TsFileSequenceReader.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/read/TsFileSequenceReader.java index 9a24644..565d677 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/read/TsFileSequenceReader.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/read/TsFileSequenceReader.java @@ -35,6 +35,7 @@ import org.apache.iotdb.tsfile.file.metadata.MetadataIndexNode; import org.apache.iotdb.tsfile.file.metadata.TimeseriesMetadata; import org.apache.iotdb.tsfile.file.metadata.TimeseriesMetadataV2; import org.apache.iotdb.tsfile.file.metadata.TsFileMetadata; +import org.apache.iotdb.tsfile.file.metadata.TsFileMetadataV2; import org.apache.iotdb.tsfile.file.metadata.enums.CompressionType; import org.apache.iotdb.tsfile.file.metadata.enums.MetadataIndexNodeType; import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType; @@ -270,6 +271,15 @@ public class TsFileSequenceReader implements AutoCloseable { return tsFileMetaData; } + public TsFileMetadataV2 readFileMetadataV2() throws IOException { + try { + return TsFileMetadataV2.deserializeFrom(readData(fileMetadataPos, fileMetadataSize)); + } catch (BufferOverflowException e) { + logger.error("Something error happened while reading file metadata of file {}", file); + throw e; + } + } + /** * this function does not modify the position of the file reader. * @@ -415,6 +425,27 @@ public class TsFileSequenceReader implements AutoCloseable { return searchResult >= 0 ? timeseriesMetadataList.get(searchResult) : null; } + public TimeseriesMetadata readTimeseriesMetadataV3(Path path, boolean ignoreNotExists) + throws IOException { + readFileMetadataV2(); + + List<TimeseriesMetadata> timeseriesMetadataList = new ArrayList<>(); + ByteBuffer buffer = readData(position(), fileMetadataPos); + while (buffer.hasRemaining()) { + try { + timeseriesMetadataList.add(TimeseriesMetadata.deserializeFrom(buffer, true)); + } catch (BufferOverflowException e) { + logger.error( + "Something error happened while deserializing TimeseriesMetadata of file {}", file); + throw e; + } + } + // return null if path does not exist in the TsFile + int searchResult = + binarySearchInTimeseriesMetadataList(timeseriesMetadataList, path.getMeasurement()); + return searchResult >= 0 ? timeseriesMetadataList.get(searchResult) : null; + } + /** * Find the leaf node that contains this vector, return all the needed subSensor and time column * @@ -1352,6 +1383,15 @@ public class TsFileSequenceReader implements AutoCloseable { return chunkMetadataList; } + public List<ChunkMetadata> getChunkMetadataListV3(Path path, boolean ignoreNotExists) + throws IOException { + TimeseriesMetadata timeseriesMetaData = readTimeseriesMetadataV3(path, ignoreNotExists); + + List<ChunkMetadata> chunkMetadataList = readChunkMetaDataList(timeseriesMetaData); + chunkMetadataList.sort(Comparator.comparingLong(IChunkMetadata::getStartTime)); + return chunkMetadataList; + } + public List<ChunkMetadata> getChunkMetadataList(Path path) throws IOException { return getChunkMetadataList(path, false); } diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/TsFileWriter.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/TsFileWriter.java index 3ae214e..4dd528d 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/TsFileWriter.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/TsFileWriter.java @@ -358,7 +358,7 @@ public class TsFileWriter implements AutoCloseable { // public void closeV2() throws IOException { LOG.info("start close file"); flushAllChunkGroups(); - fileWriter.endFileV2(); + fileWriter.endFileV3(); } /** diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java index 17036ff..3c0185c 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java @@ -32,6 +32,7 @@ import org.apache.iotdb.tsfile.file.metadata.MetadataIndexNode; import org.apache.iotdb.tsfile.file.metadata.TimeseriesMetadata; import org.apache.iotdb.tsfile.file.metadata.TimeseriesMetadataV2; import org.apache.iotdb.tsfile.file.metadata.TsFileMetadata; +import org.apache.iotdb.tsfile.file.metadata.TsFileMetadataV2; import org.apache.iotdb.tsfile.file.metadata.enums.CompressionType; import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType; import org.apache.iotdb.tsfile.file.metadata.enums.TSEncoding; @@ -47,6 +48,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.File; +import java.io.FileOutputStream; import java.io.IOException; import java.io.Serializable; import java.util.ArrayList; @@ -293,7 +295,8 @@ public class TsFileIOWriter { } } - MetadataIndexNode metadataIndex = flushMetadataIndex(chunkMetadataListMap, vectorToPathsMap); + MetadataIndexNode metadataIndex = + flushMetadataIndex(chunkMetadataListMap, vectorToPathsMap, out); TsFileMetadata tsFileMetaData = new TsFileMetadata(); tsFileMetaData.setMetadataIndex(metadataIndex); tsFileMetaData.setMetaOffset(metaOffset); @@ -409,6 +412,92 @@ public class TsFileIOWriter { canWrite = false; } + public void endFileV3() throws IOException { + long metaOffset = out.getPosition(); + + // serialize the SEPARATOR of MetaData + ReadWriteIOUtils.write(MetaMarker.SEPARATOR, out.wrapAsStream()); + + // group ChunkMetadata by series + // only contains ordinary path and time column of vector series + Map<Path, List<IChunkMetadata>> chunkMetadataListMap = new TreeMap<>(); + + // time column -> ChunkMetadataList TreeMap of value columns in vector + Map<Path, Map<Path, List<IChunkMetadata>>> vectorToPathsMap = new HashMap<>(); + + for (ChunkGroupMetadata chunkGroupMetadata : chunkGroupMetadataList) { + List<ChunkMetadata> chunkMetadatas = chunkGroupMetadata.getChunkMetadataList(); + int idx = 0; + while (idx < chunkMetadatas.size()) { + IChunkMetadata chunkMetadata = chunkMetadatas.get(idx); + if (chunkMetadata.getMask() == 0) { + Path series = new Path(chunkGroupMetadata.getDevice(), chunkMetadata.getMeasurementUid()); + chunkMetadataListMap.computeIfAbsent(series, k -> new ArrayList<>()).add(chunkMetadata); + idx++; + } else if (chunkMetadata.isTimeColumn()) { + // time column of a vector series + Path series = new Path(chunkGroupMetadata.getDevice(), chunkMetadata.getMeasurementUid()); + chunkMetadataListMap.computeIfAbsent(series, k -> new ArrayList<>()).add(chunkMetadata); + idx++; + Map<Path, List<IChunkMetadata>> chunkMetadataListMapInVector = + vectorToPathsMap.computeIfAbsent(series, key -> new TreeMap<>()); + + // value columns of a vector series + while (idx < chunkMetadatas.size() && chunkMetadatas.get(idx).isValueColumn()) { + chunkMetadata = chunkMetadatas.get(idx); + Path vectorSeries = + new Path(chunkGroupMetadata.getDevice(), chunkMetadata.getMeasurementUid()); + chunkMetadataListMapInVector + .computeIfAbsent(vectorSeries, k -> new ArrayList<>()) + .add(chunkMetadata); + idx++; + } + } + } + } + + // NOTICE: update here, TsFileMetadataV2 does not have MetadataIndexTree + // ====================== // + TsFileMetadataV2 tsFileMetaData = new TsFileMetadataV2(); + tsFileMetaData.setMetaOffset(metaOffset); + + TsFileOutput metadataIndexOutput = + new LocalTsFileOutput(new FileOutputStream(new File(file.getAbsolutePath() + ".index"))); + MetadataIndexNode metadataIndex = + flushMetadataIndex(chunkMetadataListMap, vectorToPathsMap, metadataIndexOutput); + int lastNodeSize = metadataIndex.serializeTo(metadataIndexOutput.wrapAsStream()); + + // write the size of last MetadataIndexNode + ReadWriteIOUtils.write(lastNodeSize, metadataIndexOutput.wrapAsStream()); + metadataIndexOutput.close(); + // ====================== // + + // write TsFileMetaData + int size = tsFileMetaData.serializeTo(out.wrapAsStream()); + if (logger.isDebugEnabled()) { + logger.debug("finish flushing the footer {}, file pos:{}", tsFileMetaData, out.getPosition()); + } + + // write bloom filter + size += tsFileMetaData.serializeBloomFilter(out.wrapAsStream(), chunkMetadataListMap.keySet()); + if (logger.isDebugEnabled()) { + logger.debug("finish flushing the bloom filter file pos:{}", out.getPosition()); + } + + // write TsFileMetaData size + ReadWriteIOUtils.write(size, out.wrapAsStream()); // write the size of the file metadata. + + // write magic string + out.write(MAGIC_STRING_BYTES); + + // close file + out.close(); + if (resourceLogger.isDebugEnabled() && file != null) { + resourceLogger.debug("{} writer is closed.", file.getName()); + } + canWrite = false; + } + /** * Flush TsFileMetadata, including ChunkMetadataList and TimeseriesMetaData * @@ -419,7 +508,8 @@ public class TsFileIOWriter { */ private MetadataIndexNode flushMetadataIndex( Map<Path, List<IChunkMetadata>> chunkMetadataListMap, - Map<Path, Map<Path, List<IChunkMetadata>>> vectorToPathsMap) + Map<Path, Map<Path, List<IChunkMetadata>>> vectorToPathsMap, + TsFileOutput metadataIndexOutput) throws IOException { // convert ChunkMetadataList to this field @@ -431,7 +521,8 @@ public class TsFileIOWriter { } // construct TsFileMetadata and return - return MetadataIndexConstructor.constructMetadataIndex(deviceTimeseriesMetadataMap, out); + return MetadataIndexConstructor.constructMetadataIndex( + deviceTimeseriesMetadataMap, out, metadataIndexOutput); } private MetadataIndexNode flushMetadataIndexV2( diff --git a/tsfile/src/test/java/org/apache/iotdb/tsfile/utils/FilePathUtilsTest.java b/tsfile/src/test/java/org/apache/iotdb/tsfile/utils/FilePathUtilsTest.java index eb0fd69..50bd890 100644 --- a/tsfile/src/test/java/org/apache/iotdb/tsfile/utils/FilePathUtilsTest.java +++ b/tsfile/src/test/java/org/apache/iotdb/tsfile/utils/FilePathUtilsTest.java @@ -18,7 +18,7 @@ */ package org.apache.iotdb.tsfile.utils; -import org.apache.commons.io.FileUtils; +// import org.apache.commons.io.FileUtils; import org.junit.After; import org.junit.Assert; import org.junit.Before; @@ -51,7 +51,7 @@ public class FilePathUtilsTest { tsFile = new File(fullPath); boolean success = false; try { - FileUtils.forceMkdirParent(tsFile); + // FileUtils.forceMkdirParent(tsFile); success = tsFile.createNewFile(); } catch (IOException e) { Assert.fail(e.getMessage());
