Hi XiangDong,
Sure I can do that. I’ve already done it and submit it to the confluence:
https://cwiki.apache.org/confluence/display/IOTDB/TsFile+Format
<https://cwiki.apache.org/confluence/display/IOTDB/TsFile+Format>.
> 在 2019年2月25日,上午8:22,Xiangdong Huang <[email protected]> 写道:
>
> Hi Dongfang,
>
> In this PR, you modified the format of TsFile. Can you update the document
> to describe the new TsFile format? The document to describe TsFile is very
> important.
>
> I suggest you update this document and submit it to the confluence:
> https://cwiki.apache.org/confluence/display/IOTDB
>
> (We have an old version on the previous git repo wiki:
> https://github.com/thulab/iotdb/wiki/%5BTsFile%5D-What-is-new-from-v0.7.0--to-Kill_Thanos
> )
>
> Best,
> -----------------------------------
> Xiangdong Huang
> School of Software, Tsinghua University
>
> 黄向东
> 清华大学 软件学院
>
>
> <[email protected]> 于2019年2月20日周三 下午6:41写道:
>
>> This is an automated email from the ASF dual-hosted git repository.
>>
>> east pushed a commit to branch east_update_chunkgroupmetadata
>> in repository https://gitbox.apache.org/repos/asf/incubator-iotdb.git
>>
>>
>> The following commit(s) were added to
>> refs/heads/east_update_chunkgroupmetadata by this push:
>> new f96662f add offset and size for ChunkGroupMetaData
>> f96662f is described below
>>
>> commit f96662fc764f86dfc99f941ec00ea76108924dee
>> Author: mdf369 <[email protected]>
>> AuthorDate: Wed Feb 20 18:41:20 2019 +0800
>>
>> add offset and size for ChunkGroupMetaData
>> ---
>> .../apache/iotdb}/tsfile/TsFileRead.java | 0
>> .../apache/iotdb}/tsfile/TsFileSequenceRead.java | 28
>> ++++++++++++++++------
>> .../apache/iotdb}/tsfile/TsFileWrite.java | 2 ++
>> .../tsfile/file/metadata/ChunkGroupMetaData.java | 20 ++++++++--------
>> .../iotdb/tsfile/write/writer/TsFileIOWriter.java | 4 +---
>> 5 files changed, 34 insertions(+), 20 deletions(-)
>>
>> diff --git
>> a/tsfile/example/src/main/java/cn/edu/tsinghua/tsfile/TsFileRead.java
>> b/tsfile/example/src/main/java/org/apache/iotdb/tsfile/TsFileRead.java
>> similarity index 100%
>> rename from
>> tsfile/example/src/main/java/cn/edu/tsinghua/tsfile/TsFileRead.java
>> rename to
>> tsfile/example/src/main/java/org/apache/iotdb/tsfile/TsFileRead.java
>> diff --git
>> a/tsfile/example/src/main/java/cn/edu/tsinghua/tsfile/TsFileSequenceRead.java
>> b/tsfile/example/src/main/java/org/apache/iotdb/tsfile/TsFileSequenceRead.java
>> similarity index 84%
>> rename from
>> tsfile/example/src/main/java/cn/edu/tsinghua/tsfile/TsFileSequenceRead.java
>> rename to
>> tsfile/example/src/main/java/org/apache/iotdb/tsfile/TsFileSequenceRead.java
>> index faecbdd..c973369 100644
>> ---
>> a/tsfile/example/src/main/java/cn/edu/tsinghua/tsfile/TsFileSequenceRead.java
>> +++
>> b/tsfile/example/src/main/java/org/apache/iotdb/tsfile/TsFileSequenceRead.java
>> @@ -21,6 +21,7 @@ package org.apache.iotdb.tsfile;
>> import java.io.File;
>> import java.io.IOException;
>> import java.nio.ByteBuffer;
>> +import java.util.ArrayList;
>> import java.util.List;
>> import java.util.stream.Collectors;
>> import org.apache.iotdb.tsfile.common.conf.TSFileDescriptor;
>> @@ -39,11 +40,12 @@ import
>> org.apache.iotdb.tsfile.file.metadata.enums.TSEncoding;
>> import org.apache.iotdb.tsfile.read.TsFileSequenceReader;
>> import org.apache.iotdb.tsfile.read.common.BatchData;
>> import org.apache.iotdb.tsfile.read.reader.page.PageReader;
>> +import org.apache.iotdb.tsfile.utils.Pair;
>>
>> public class TsFileSequenceRead {
>>
>> - public static void main(String[] args) throws IOException {
>> - TsFileSequenceReader reader = new TsFileSequenceReader("test.tsfile");
>> + public static void main(String[] args) throws Exception {
>> + TsFileSequenceReader reader = new
>> TsFileSequenceReader("/Users/East/projects/IoTDB/incubator-iotdb/tsfile/src/test/resources/test.tsfile");
>> System.out.println("file length: " + new
>> File("test.tsfile").length());
>> System.out.println("file magic head: " + reader.readHeadMagic());
>> System.out.println("file magic tail: " + reader.readTailMagic());
>> @@ -54,12 +56,14 @@ public class TsFileSequenceRead {
>> // first SeriesChunks (headers and data) in one ChunkGroup, then the
>> CHUNK_GROUP_FOOTER
>> // Because we do not know how many chunks a ChunkGroup may have, we
>> should read one byte (the marker) ahead and
>> // judge accordingly.
>> + List<Pair<Long, Long>> offsetList = new ArrayList<>();
>> + long startOffset = reader.position();
>> System.out.println("[Chunk Group]");
>> System.out.println("position: " + reader.position());
>> byte marker;
>> - while ((marker = reader.readMarker()) != MetaMarker.Separator) {
>> + while ((marker = reader.readMarker()) != MetaMarker.SEPARATOR) {
>> switch (marker) {
>> - case MetaMarker.ChunkHeader:
>> + case MetaMarker.CHUNK_HEADER:
>> System.out.println("\t[Chunk]");
>> System.out.println("\tposition: " + reader.position());
>> ChunkHeader header = reader.readChunkHeader();
>> @@ -90,28 +94,38 @@ public class TsFileSequenceRead {
>> }
>> }
>> break;
>> - case MetaMarker.ChunkGroupFooter:
>> + case MetaMarker.CHUNK_GROUP_FOOTER:
>> System.out.println("Chunk Group Footer position: " +
>> reader.position());
>> ChunkGroupFooter chunkGroupFooter =
>> reader.readChunkGroupFooter();
>> System.out.println("device: " + chunkGroupFooter.getDeviceID());
>> + long endOffset = reader.position();
>> + offsetList.add(new Pair<>(startOffset, endOffset));
>> + startOffset = endOffset;
>> break;
>> default:
>> MetaMarker.handleUnexpectedMarker(marker);
>> }
>> }
>> System.out.println("[Metadata]");
>> + int offsetListIndex = 0;
>> List<TsDeviceMetadataIndex> deviceMetadataIndexList =
>> metaData.getDeviceMap().values().stream()
>> .sorted((x, y) -> (int) (x.getOffset() -
>> y.getOffset())).collect(Collectors.toList());
>> for (TsDeviceMetadataIndex index : deviceMetadataIndexList) {
>> TsDeviceMetadata deviceMetadata =
>> reader.readTsDeviceMetaData(index);
>> - List<ChunkGroupMetaData> chunkGroupMetaDataList =
>> deviceMetadata.getChunkGroups();
>> + List<ChunkGroupMetaData> chunkGroupMetaDataList =
>> deviceMetadata.getChunkGroupMetaDataList();
>> for (ChunkGroupMetaData chunkGroupMetaData :
>> chunkGroupMetaDataList) {
>> System.out.println(String
>> .format("\t[Device]File Offset: %d, Device %s, Number of
>> Chunk Groups %d",
>> index.getOffset(), chunkGroupMetaData.getDeviceID(),
>> chunkGroupMetaDataList.size()));
>> +
>> + Pair<Long, Long> pair = offsetList.get(offsetListIndex++);
>> + if (chunkGroupMetaData.getOffsetOfChunkGroupHeader() != pair.left
>> || chunkGroupMetaData.getEndPositionOfChunkGroup() != pair.right) {
>> + throw new Exception("Wrong offset of chunk group meta data!");
>> + }
>> +
>> for (ChunkMetaData chunkMetadata :
>> chunkGroupMetaData.getChunkMetaDataList()) {
>> - System.out.println("\t\tMeasurement:" +
>> chunkMetadata.getMeasurementUID());
>> + System.out.println("\t\tMeasurement:" +
>> chunkMetadata.getMeasurementUid());
>> System.out.println("\t\tFile offset:" +
>> chunkMetadata.getOffsetOfChunkHeader());
>> }
>> }
>> diff --git
>> a/tsfile/example/src/main/java/cn/edu/tsinghua/tsfile/TsFileWrite.java
>> b/tsfile/example/src/main/java/org/apache/iotdb/tsfile/TsFileWrite.java
>> similarity index 97%
>> rename from
>> tsfile/example/src/main/java/cn/edu/tsinghua/tsfile/TsFileWrite.java
>> rename to
>> tsfile/example/src/main/java/org/apache/iotdb/tsfile/TsFileWrite.java
>> index 37b69be..367cbde 100644
>> --- a/tsfile/example/src/main/java/cn/edu/tsinghua/tsfile/TsFileWrite.java
>> +++ b/tsfile/example/src/main/java/org/apache/iotdb/tsfile/TsFileWrite.java
>> @@ -29,6 +29,8 @@ import
>> org.apache.iotdb.tsfile.file.metadata.enums.TSEncoding;
>> import org.apache.iotdb.tsfile.write.TsFileWriter;
>> import org.apache.iotdb.tsfile.write.record.TSRecord;
>> import org.apache.iotdb.tsfile.write.record.datapoint.DataPoint;
>> +import org.apache.iotdb.tsfile.write.record.datapoint.FloatDataPoint;
>> +import org.apache.iotdb.tsfile.write.record.datapoint.IntDataPoint;
>> import org.apache.iotdb.tsfile.write.schema.MeasurementSchema;
>>
>> /**
>> diff --git
>> a/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/ChunkGroupMetaData.java
>> b/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/ChunkGroupMetaData.java
>> index 99db769..1884b18 100644
>> ---
>> a/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/ChunkGroupMetaData.java
>> +++
>> b/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/ChunkGroupMetaData.java
>> @@ -49,10 +49,10 @@ public class ChunkGroupMetaData {
>> private long offsetOfChunkGroupHeader;
>>
>> /**
>> - * Byte size of the corresponding data in the file Notice: include the
>> chunk group header and marker.
>> + * End Byte position of the whole chunk group in the file Notice:
>> position after the chunk group footer.
>> * For Hadoop and Spark.
>> */
>> - private long sizeOfChunkGroup;
>> + private long endPositionOfChunkGroup;
>>
>> /**
>> * All time series chunks in this chunk group.
>> @@ -94,7 +94,7 @@ public class ChunkGroupMetaData {
>>
>> chunkGroupMetaData.deviceID =
>> ReadWriteIOUtils.readString(inputStream);
>> chunkGroupMetaData.offsetOfChunkGroupHeader =
>> ReadWriteIOUtils.readLong(inputStream);
>> - chunkGroupMetaData.sizeOfChunkGroup =
>> ReadWriteIOUtils.readLong(inputStream);
>> + chunkGroupMetaData.endPositionOfChunkGroup =
>> ReadWriteIOUtils.readLong(inputStream);
>>
>> int size = ReadWriteIOUtils.readInt(inputStream);
>> chunkGroupMetaData.serializedSize =
>> @@ -123,7 +123,7 @@ public class ChunkGroupMetaData {
>>
>> chunkGroupMetaData.deviceID = ReadWriteIOUtils.readString(buffer);
>> chunkGroupMetaData.offsetOfChunkGroupHeader =
>> ReadWriteIOUtils.readLong(buffer);
>> - chunkGroupMetaData.sizeOfChunkGroup =
>> ReadWriteIOUtils.readLong(buffer);
>> + chunkGroupMetaData.endPositionOfChunkGroup =
>> ReadWriteIOUtils.readLong(buffer);
>>
>> int size = ReadWriteIOUtils.readInt(buffer);
>>
>> @@ -182,12 +182,12 @@ public class ChunkGroupMetaData {
>> return offsetOfChunkGroupHeader;
>> }
>>
>> - public long getSizeOfChunkGroup() {
>> - return sizeOfChunkGroup;
>> + public long getEndPositionOfChunkGroup() {
>> + return endPositionOfChunkGroup;
>> }
>>
>> - public void setSizeOfChunkGroup(long sizeOfChunkGroup) {
>> - this.sizeOfChunkGroup = sizeOfChunkGroup;
>> + public void setEndPositionOfChunkGroup(long endPositionOfChunkGroup) {
>> + this.endPositionOfChunkGroup = endPositionOfChunkGroup;
>> }
>>
>> /**
>> @@ -201,7 +201,7 @@ public class ChunkGroupMetaData {
>> int byteLen = 0;
>> byteLen += ReadWriteIOUtils.write(deviceID, outputStream);
>> byteLen += ReadWriteIOUtils.write(offsetOfChunkGroupHeader,
>> outputStream);
>> - byteLen += ReadWriteIOUtils.write(sizeOfChunkGroup, outputStream);
>> + byteLen += ReadWriteIOUtils.write(endPositionOfChunkGroup,
>> outputStream);
>>
>> byteLen += ReadWriteIOUtils.write(chunkMetaDataList.size(),
>> outputStream);
>> for (ChunkMetaData chunkMetaData : chunkMetaDataList) {
>> @@ -223,7 +223,7 @@ public class ChunkGroupMetaData {
>>
>> byteLen += ReadWriteIOUtils.write(deviceID, buffer);
>> byteLen += ReadWriteIOUtils.write(offsetOfChunkGroupHeader, buffer);
>> - byteLen += ReadWriteIOUtils.write(sizeOfChunkGroup, buffer);
>> + byteLen += ReadWriteIOUtils.write(endPositionOfChunkGroup, buffer);
>>
>> byteLen += ReadWriteIOUtils.write(chunkMetaDataList.size(), buffer);
>> for (ChunkMetaData chunkMetaData : chunkMetaDataList) {
>> diff --git
>> a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java
>> b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java
>> index 54de973..f40f1bd 100644
>> ---
>> a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java
>> +++
>> b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java
>> @@ -192,10 +192,8 @@ public class TsFileIOWriter {
>> * @param chunkGroupFooter -use to serialize
>> */
>> public void endChunkGroup(ChunkGroupFooter chunkGroupFooter) throws
>> IOException {
>> - long pos = out.getPosition();
>> chunkGroupFooter.serializeTo(out.wrapAsStream());
>> - long chunkGroupFooterSize = out.getPosition() - pos;
>> -
>> currentChunkGroupMetaData.setSizeOfChunkGroup(chunkGroupFooter.getDataSize()
>> + chunkGroupFooterSize);
>> +
>> currentChunkGroupMetaData.setEndPositionOfChunkGroup(out.getPosition());
>> chunkGroupMetaDataList.add(currentChunkGroupMetaData);
>> LOG.debug("end chunk group:{}", currentChunkGroupMetaData);
>> currentChunkGroupMetaData = null;
>>
>>