This is an automated email from the ASF dual-hosted git repository.

hxd pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-iotdb.git


The following commit(s) were added to refs/heads/master by this push:
     new 5309448  [IOTDB-39]Refactor auto repair for TsFile  reader and TsFile 
writer (#100)
5309448 is described below

commit 530944873082870f06bba38c53de1ec2b16a53bc
Author: Xiangdong Huang <[email protected]>
AuthorDate: Tue Mar 26 08:51:37 2019 +0800

    [IOTDB-39]Refactor auto repair for TsFile  reader and TsFile writer (#100)
    
    * move recovery function from TsFileSequenceReader to its subclass
    
    * add append option in TsFile writer and add a subclass of 
TsFileSequenceReader for supporting auto repair
---
 .../iotdb/tsfile/read/TsFileCheckStatus.java       |  28 +++
 .../iotdb/tsfile/read/TsFileRestorableReader.java  |  70 ++++++
 .../iotdb/tsfile/read/TsFileSequenceReader.java    | 238 +++++++++++++++++----
 .../write/writer/NativeRestorableIOWriter.java     | 193 +++--------------
 .../tsfile/read/TsFileRestorableReaderTest.java    |  59 +++++
 .../tsfile/read/TsFileSequenceReaderTest.java      |  23 --
 .../iotdb/tsfile/utils/TsFileGeneratorForTest.java |  66 +++---
 .../write/writer/NativeRestorableIOWriterTest.java |  41 ++--
 8 files changed, 431 insertions(+), 287 deletions(-)

diff --git 
a/tsfile/src/main/java/org/apache/iotdb/tsfile/read/TsFileCheckStatus.java 
b/tsfile/src/main/java/org/apache/iotdb/tsfile/read/TsFileCheckStatus.java
new file mode 100644
index 0000000..3738b42
--- /dev/null
+++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/read/TsFileCheckStatus.java
@@ -0,0 +1,28 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iotdb.tsfile.read;
+
+public class TsFileCheckStatus {
+  public static final long COMPLETE_FILE = -1;
+  public static final long ONLY_MAGIC_HEAD = -2;
+  public static final long INCOMPATIBLE_FILE = -3;
+  public static final long FILE_NOT_FOUND = -4;
+
+}
diff --git 
a/tsfile/src/main/java/org/apache/iotdb/tsfile/read/TsFileRestorableReader.java 
b/tsfile/src/main/java/org/apache/iotdb/tsfile/read/TsFileRestorableReader.java
new file mode 100644
index 0000000..44b29a6
--- /dev/null
+++ 
b/tsfile/src/main/java/org/apache/iotdb/tsfile/read/TsFileRestorableReader.java
@@ -0,0 +1,70 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iotdb.tsfile.read;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import org.apache.iotdb.tsfile.common.conf.TSFileConfig;
+import org.apache.iotdb.tsfile.write.TsFileWriter;
+import org.apache.iotdb.tsfile.write.writer.NativeRestorableIOWriter;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TsFileRestorableReader extends TsFileSequenceReader {
+
+  private static final Logger LOGGER = 
LoggerFactory.getLogger(TsFileRestorableReader.class);
+
+  public TsFileRestorableReader(String file) throws IOException {
+    this(file, true);
+  }
+
+  public TsFileRestorableReader(String file, boolean autoRepair)
+      throws IOException {
+    //if autoRepair == true, then it means the file is likely broken, so we 
can not read metadata
+    //otherwise, the user may consider that either the file is complete, or 
the user can accept an
+    // Exception when reading broken data. Therefore, we set loadMetadata as 
true in this case.
+    super(file, !autoRepair);
+    if (autoRepair) {
+      try {
+        checkAndRepair();
+      } catch (Throwable e) {
+        close();
+        throw e;
+      }
+      loadMetadataSize();
+    }
+  }
+
+  /**
+   * Checks if the file is incomplete, and if so, tries to repair it.
+   */
+  private void checkAndRepair() throws IOException {
+    // Check if file is damaged
+    if (!isComplete()) {
+      // Try to close it
+      LOGGER.info("File {} has no correct tail magic, try to repair...", file);
+      NativeRestorableIOWriter rWriter = new NativeRestorableIOWriter(new 
File(file), false);
+      TsFileWriter writer = new TsFileWriter(rWriter);
+      // This writes the right magic string
+      writer.close();
+    }
+  }
+}
diff --git 
a/tsfile/src/main/java/org/apache/iotdb/tsfile/read/TsFileSequenceReader.java 
b/tsfile/src/main/java/org/apache/iotdb/tsfile/read/TsFileSequenceReader.java
index 72f268d..7107c8b 100644
--- 
a/tsfile/src/main/java/org/apache/iotdb/tsfile/read/TsFileSequenceReader.java
+++ 
b/tsfile/src/main/java/org/apache/iotdb/tsfile/read/TsFileSequenceReader.java
@@ -18,16 +18,25 @@
  */
 package org.apache.iotdb.tsfile.read;
 
+import static 
org.apache.iotdb.tsfile.write.writer.TsFileIOWriter.magicStringBytes;
+
 import java.io.File;
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.nio.file.Path;
 import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
 import org.apache.iotdb.tsfile.common.conf.TSFileConfig;
 import org.apache.iotdb.tsfile.compress.IUnCompressor;
+import org.apache.iotdb.tsfile.file.MetaMarker;
 import org.apache.iotdb.tsfile.file.footer.ChunkGroupFooter;
 import org.apache.iotdb.tsfile.file.header.ChunkHeader;
 import org.apache.iotdb.tsfile.file.header.PageHeader;
+import org.apache.iotdb.tsfile.file.metadata.ChunkGroupMetaData;
 import org.apache.iotdb.tsfile.file.metadata.ChunkMetaData;
 import org.apache.iotdb.tsfile.file.metadata.TsDeviceMetadata;
 import org.apache.iotdb.tsfile.file.metadata.TsDeviceMetadataIndex;
@@ -38,12 +47,11 @@ import org.apache.iotdb.tsfile.read.common.Chunk;
 import org.apache.iotdb.tsfile.read.reader.DefaultTsFileInput;
 import org.apache.iotdb.tsfile.read.reader.TsFileInput;
 import org.apache.iotdb.tsfile.utils.ReadWriteIOUtils;
-import org.apache.iotdb.tsfile.write.TsFileWriter;
-import org.apache.iotdb.tsfile.write.writer.NativeRestorableIOWriter;
+import org.apache.iotdb.tsfile.write.schema.MeasurementSchema;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-public class TsFileSequenceReader {
+public class TsFileSequenceReader implements AutoCloseable{
 
   private static final Logger LOGGER = 
LoggerFactory.getLogger(TsFileSequenceReader.class);
 
@@ -51,7 +59,7 @@ public class TsFileSequenceReader {
   private long fileMetadataPos;
   private int fileMetadataSize;
   private ByteBuffer markerBuffer = ByteBuffer.allocate(Byte.BYTES);
-  private String file;
+  protected String file;
 
   /**
    * Create a file reader of the given file. The reader will read the tail of 
the file to get the
@@ -62,7 +70,7 @@ public class TsFileSequenceReader {
    * @throws IOException If some I/O error occurs
    */
   public TsFileSequenceReader(String file) throws IOException {
-    this(file, true, false);
+    this(file, true);
   }
 
   /**
@@ -72,41 +80,16 @@ public class TsFileSequenceReader {
    * @param loadMetadataSize -load meta data size
    */
   public TsFileSequenceReader(String file, boolean loadMetadataSize) throws 
IOException {
-    this(file, loadMetadataSize, false);
-  }
-
-  /**
-   * construct function for TsFileSequenceReader.
-   *
-   * @param file -given file name
-   * @param loadMetadataSize -load meta data size
-   * @param autoRepair if true it tries to automatically repair a damaged file 
(by changing it!)
-   *                   before reading
-   */
-  public TsFileSequenceReader(String file, boolean loadMetadataSize, boolean 
autoRepair) throws IOException {
     this.file = file;
     final Path path = Paths.get(file);
     tsFileInput = new DefaultTsFileInput(path);
-    if (autoRepair) {
-      checkAndRepair(file, path);
-    }
-    if (loadMetadataSize) {
-      loadMetadataSize();
-    }
-  }
-
-  /**
-   * Checks if the file is incomplete, and if so, tries to repair it.
-   */
-  private void checkAndRepair(String file, Path path) throws IOException {
-    // Check if file is damaged
-    if (!TSFileConfig.MAGIC_STRING.equals(readTailMagic())) {
-      // Try to close it
-      LOGGER.info("File {} has no correct tail magic, try to repair...", 
path.toAbsolutePath());
-      NativeRestorableIOWriter rWriter = new NativeRestorableIOWriter(new 
File(file));
-      TsFileWriter writer = new TsFileWriter(rWriter);
-      // This writes the right magic string
-      writer.close();
+    try {
+      if (loadMetadataSize) {
+       loadMetadataSize();
+      }
+    } catch (Throwable e) {
+      tsFileInput.close();
+      throw e;
     }
   }
 
@@ -125,7 +108,7 @@ public class TsFileSequenceReader {
     this.fileMetadataSize = fileMetadataSize;
   }
 
-  private void loadMetadataSize() throws IOException {
+  protected void loadMetadataSize() throws IOException {
     ByteBuffer metadataSize = ByteBuffer.allocate(Integer.BYTES);
     tsFileInput.read(metadataSize,
         tsFileInput.size() - TSFileConfig.MAGIC_STRING.length() - 
Integer.BYTES);
@@ -152,12 +135,6 @@ public class TsFileSequenceReader {
   public String readTailMagic() throws IOException {
     long totalSize = tsFileInput.size();
 
-    // CHeck if the file is large enough to contain a tail magic
-    // If the file only contains a header magic, this could also be assumed to 
be the tail magic
-    if (totalSize <= TSFileConfig.MAGIC_STRING.length()) {
-      throw new IOException("This file has no tail magic!");
-    }
-
     ByteBuffer magicStringBytes = 
ByteBuffer.allocate(TSFileConfig.MAGIC_STRING.length());
     tsFileInput.read(magicStringBytes, totalSize - 
TSFileConfig.MAGIC_STRING.length());
     magicStringBytes.flip();
@@ -165,6 +142,16 @@ public class TsFileSequenceReader {
   }
 
   /**
+   * whether the file is a complete TsFile: only if the head magic and tail 
magic string exists.
+   * @return
+   * @throws IOException
+   */
+  public boolean isComplete() throws IOException {
+    return tsFileInput.size() >= TSFileConfig.MAGIC_STRING.length() * 2 && 
readTailMagic()
+        .equals(readHeadMagic());
+  }
+
+  /**
    * this function does not modify the position of the file reader.
    */
   public String readHeadMagic() throws IOException {
@@ -197,6 +184,21 @@ public class TsFileSequenceReader {
   }
 
   /**
+   * @return get the position after the last chunk group in the file
+   */
+  public long getPositionOfFirstDeviceMetaIndex() throws IOException {
+    TsFileMetaData metaData = readFileMetadata();
+    Optional<Long> data = 
metaData.getDeviceMap().values().stream().map(TsDeviceMetadataIndex::getOffset)
+        .min(Comparator.comparing(Long::valueOf));
+    if(data.isPresent()) {
+      return data.get();
+    } else {
+      //no real data
+      return TSFileConfig.MAGIC_STRING.length();
+    }
+  }
+
+  /**
    * this function does not modify the position of the file reader.
    */
   public TsDeviceMetadata readTsDeviceMetaData(TsDeviceMetadataIndex index) 
throws IOException {
@@ -427,4 +429,152 @@ public class TsFileSequenceReader {
         .readAsPossible(tsFileInput.wrapAsFileChannel(), target, position, 
length);
   }
 
+  /**
+   * Self Check the file and return the position before where the data is safe.
+   *
+   * @param newSchema @OUT.  the measurement schema in the file will be added 
into
+   * this parameter.
+   * @param newMetaData @OUT can not be null, the chunk group metadta in the 
file will be added into
+   * this parameter.
+   * @param fastFinish if true and the file is complete, then newSchema and 
newMetaData parameter
+   * will be not modified.
+   * @return the position of the file that is fine. All data after the 
position in the file should
+   * be truncated.
+   */
+  public long selfCheck(Map<String, MeasurementSchema> newSchema,
+      List<ChunkGroupMetaData> newMetaData, boolean fastFinish) throws 
IOException {
+    File checkFile = new File(this.file);
+    long fileSize;
+    if (!checkFile.exists()) {
+      return TsFileCheckStatus.FILE_NOT_FOUND;
+    } else {
+      fileSize = checkFile.length();
+    }
+    ChunkMetaData currentChunk;
+    String measurementID;
+    TSDataType dataType;
+    long fileOffsetOfChunk;
+    long startTimeOfChunk = 0;
+    long endTimeOfChunk = 0;
+    long numOfPoints = 0;
+
+    ChunkGroupMetaData currentChunkGroup;
+    List<ChunkMetaData> chunks = null;
+    String deviceID;
+    long startOffsetOfChunkGroup = 0;
+    long endOffsetOfChunkGroup;
+    long versionOfChunkGroup = 0;
+    boolean haveReadAnUnverifiedGroupFooter = false;
+    boolean newGroup = true;
+
+    if (fileSize < TSFileConfig.MAGIC_STRING.length()) {
+      return TsFileCheckStatus.INCOMPATIBLE_FILE;
+    }
+    String magic = readHeadMagic(true);
+    if (!magic.equals(TSFileConfig.MAGIC_STRING)) {
+      return TsFileCheckStatus.INCOMPATIBLE_FILE;
+    }
+
+    if (fileSize == TSFileConfig.MAGIC_STRING.length()) {
+      return TsFileCheckStatus.ONLY_MAGIC_HEAD;
+    } else if (readTailMagic().equals(magic)) {
+      loadMetadataSize();
+      if (fastFinish) {
+        return TsFileCheckStatus.COMPLETE_FILE;
+      }
+    }
+
+    // not a complete file, we will recover it...
+    long truncatedPosition = magicStringBytes.length;
+    boolean goon = true;
+    byte marker;
+    try {
+      while (goon && (marker = this.readMarker()) != MetaMarker.SEPARATOR) {
+        switch (marker) {
+          case MetaMarker.CHUNK_HEADER:
+            //this is a chunk.
+            if (haveReadAnUnverifiedGroupFooter) {
+              //now we are sure that the last ChunkGroupFooter is complete.
+              haveReadAnUnverifiedGroupFooter = false;
+              truncatedPosition = this.position() - 1;
+              newGroup = true;
+            }
+            if (newGroup) {
+              chunks = new ArrayList<>();
+              startOffsetOfChunkGroup = this.position() - 1;
+              newGroup = false;
+            }
+            //if there is something wrong with a chunk, we will drop this part 
of data
+            // (the whole ChunkGroup)
+            ChunkHeader header = this.readChunkHeader();
+            measurementID = header.getMeasurementID();
+            if (newSchema != null) {
+              newSchema.putIfAbsent(measurementID,
+                  new MeasurementSchema(measurementID, header.getDataType(),
+                      header.getEncodingType(), header.getCompressionType()));
+            }
+            dataType = header.getDataType();
+            fileOffsetOfChunk = this.position() - 1;
+            if (header.getNumOfPages() > 0) {
+              PageHeader pageHeader = 
this.readPageHeader(header.getDataType());
+              numOfPoints += pageHeader.getNumOfValues();
+              startTimeOfChunk = pageHeader.getMinTimestamp();
+              endTimeOfChunk = pageHeader.getMaxTimestamp();
+              this.skipPageData(pageHeader);
+            }
+            for (int j = 1; j < header.getNumOfPages() - 1; j++) {
+              //a new Page
+              PageHeader pageHeader = 
this.readPageHeader(header.getDataType());
+              this.skipPageData(pageHeader);
+            }
+            if (header.getNumOfPages() > 1) {
+              PageHeader pageHeader = 
this.readPageHeader(header.getDataType());
+              endTimeOfChunk = pageHeader.getMaxTimestamp();
+              this.skipPageData(pageHeader);
+            }
+            currentChunk = new ChunkMetaData(measurementID, dataType, 
fileOffsetOfChunk,
+                startTimeOfChunk, endTimeOfChunk);
+            currentChunk.setNumOfPoints(numOfPoints);
+            chunks.add(currentChunk);
+            numOfPoints = 0;
+            break;
+          case MetaMarker.CHUNK_GROUP_FOOTER:
+            //this is a chunk group
+            //if there is something wrong with the chunkGroup Footer, we will 
drop this part of data
+            //because we can not guarantee the correction of the deviceId.
+            ChunkGroupFooter chunkGroupFooter = this.readChunkGroupFooter();
+            deviceID = chunkGroupFooter.getDeviceID();
+            endOffsetOfChunkGroup = this.position();
+            currentChunkGroup = new ChunkGroupMetaData(deviceID, chunks, 
startOffsetOfChunkGroup);
+            currentChunkGroup.setEndOffsetOfChunkGroup(endOffsetOfChunkGroup);
+            currentChunkGroup.setVersion(versionOfChunkGroup++);
+            newMetaData.add(currentChunkGroup);
+            // though we have read the current ChunkMetaData from Disk, it may 
be incomplete.
+            // because if the file only loses one byte, the 
ChunkMetaData.deserialize() returns ok,
+            // while the last filed of the ChunkMetaData is incorrect.
+            // So, only reading the next MASK, can make sure that this 
ChunkMetaData is complete.
+            haveReadAnUnverifiedGroupFooter = true;
+            break;
+
+          default:
+            // it is impossible that we read an incorrect data.
+            MetaMarker.handleUnexpectedMarker(marker);
+            goon = false;
+        }
+      }
+      //now we read the tail of the file, so we are sure that the last 
ChunkGroupFooter is complete.
+      truncatedPosition = this.position() - 1;
+    } catch (IOException e2) {
+      //if it is the end of the file, and we read an unverifiedGroupFooter, we 
must remove this ChunkGroup
+      if (haveReadAnUnverifiedGroupFooter && !newMetaData.isEmpty()) {
+        newMetaData.remove(newMetaData.size() - 1);
+      }
+    } finally {
+      //something wrong or all data is complete. We will discard current 
FileMetadata
+      // so that we can continue to write data into this tsfile.
+      return truncatedPosition;
+    }
+  }
+
+
 }
diff --git 
a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/NativeRestorableIOWriter.java
 
b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/NativeRestorableIOWriter.java
index deebd25..2496d42 100644
--- 
a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/NativeRestorableIOWriter.java
+++ 
b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/NativeRestorableIOWriter.java
@@ -21,17 +21,10 @@ package org.apache.iotdb.tsfile.write.writer;
 
 import java.io.File;
 import java.io.IOException;
-import java.util.ArrayList;
 import java.util.HashMap;
-import java.util.List;
 import java.util.Map;
-import org.apache.iotdb.tsfile.file.MetaMarker;
-import org.apache.iotdb.tsfile.file.footer.ChunkGroupFooter;
-import org.apache.iotdb.tsfile.file.header.ChunkHeader;
-import org.apache.iotdb.tsfile.file.header.PageHeader;
-import org.apache.iotdb.tsfile.file.metadata.ChunkGroupMetaData;
-import org.apache.iotdb.tsfile.file.metadata.ChunkMetaData;
-import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType;
+import org.apache.iotdb.tsfile.common.conf.TSFileConfig;
+import org.apache.iotdb.tsfile.read.TsFileCheckStatus;
 import org.apache.iotdb.tsfile.read.TsFileSequenceReader;
 import org.apache.iotdb.tsfile.write.schema.MeasurementSchema;
 import org.slf4j.Logger;
@@ -53,173 +46,43 @@ public class NativeRestorableIOWriter extends 
TsFileIOWriter {
   }
 
   public NativeRestorableIOWriter(File file) throws IOException {
-    this(file, true);
+    this(file, false);
   }
 
   /**
    * @param file a given tsfile path you want to (continue to) write
-   * @param autoRepair whether auto repair the tsfile (if it is broken)
+   * @param append if true, then the file can support appending data even 
though the file is complete (i.e., tail magic string exists)
    * @throws IOException if write failed, or the file is broken but 
autoRepair==false.
    */
-  public NativeRestorableIOWriter(File file, boolean autoRepair) throws 
IOException {
+  public NativeRestorableIOWriter(File file, boolean append) throws 
IOException {
     super();
-    long fileSize;
-    if (!file.exists()) {
-      this.out = new DefaultTsFileOutput(file, true);
-      startFile();
+    this.out = new DefaultTsFileOutput(file, true);
+    if (file.length() == 0) {
+      //this is a new file
       return;
-    } else {
-      fileSize = file.length();
-      this.out = new DefaultTsFileOutput(file, true);
     }
-
-    //we need to read data to recover TsFileIOWriter.chunkGroupMetaDataList
-    //and remove broken data if exists.
-
-    ChunkMetaData currentChunk;
-    String measurementID;
-    TSDataType dataType;
-    long fileOffsetOfChunk;
-    long startTimeOfChunk = 0;
-    long endTimeOfChunk = 0;
-    long numOfPoints = 0;
-
-    ChunkGroupMetaData currentChunkGroup;
-    List<ChunkMetaData> chunks = null;
-    String deviceID;
-    long startOffsetOfChunkGroup = 0;
-    long endOffsetOfChunkGroup;
-    long versionOfChunkGroup = 0;
-    boolean haveReadAnUnverifiedGroupFooter = false;
-    boolean newGroup = true;
-
-    TsFileSequenceReader reader = new 
TsFileSequenceReader(file.getAbsolutePath(), false);
-    if (fileSize < magicStringBytes.length) {
-        reader.close();
-        out.close();
-        throw new IOException(String
-            .format("%s is not using TsFile format, and will be ignored...", 
file.getAbsolutePath()));
-    }
-    String magic = reader.readHeadMagic(true);
-    if (!magic.equals(new String(magicStringBytes))) {
-      reader.close();
-      out.close();
-      throw new IOException(String
-          .format("%s is not using TsFile format, and will be ignored...", 
file.getAbsolutePath()));
-    }
-
-    if (fileSize == magicStringBytes.length) {
-      if (!autoRepair) {
-        reader.close();
-        out.close();
-        throw new IOException(String
-            .format("%s only has header, but does not allowed to be 
repaired...", file.getAbsolutePath()));
-      }
-    } else if (reader.readTailMagic().equals(magic)) {
-      LOGGER.debug("{} is an complete TsFile.", file.getAbsolutePath());
-      canWrite = false;
-      reader.close();
-      out.close();
-      return;
-    }
-
-    // not a complete file, we will recover it...
-    truncatedPosition = magicStringBytes.length;
-    boolean goon = true;
-    byte marker;
-    try {
-      while (goon && (marker = reader.readMarker()) != MetaMarker.SEPARATOR) {
-        switch (marker) {
-          case MetaMarker.CHUNK_HEADER:
-            //this is a chunk.
-            if (haveReadAnUnverifiedGroupFooter) {
-              //now we are sure that the last ChunkGroupFooter is complete.
-              haveReadAnUnverifiedGroupFooter = false;
-              truncatedPosition = reader.position() - 1;
-              newGroup = true;
-            }
-            if (newGroup) {
-              chunks = new ArrayList<>();
-              startOffsetOfChunkGroup = reader.position() - 1;
-              newGroup = false;
-            }
-            //if there is something wrong with a chunk, we will drop this part 
of data
-            // (the whole ChunkGroup)
-            ChunkHeader header = reader.readChunkHeader();
-            measurementID = header.getMeasurementID();
-            knownSchemas.putIfAbsent(measurementID,
-                new MeasurementSchema(measurementID, header.getDataType(),
-                    header.getEncodingType(), header.getCompressionType()));
-            dataType = header.getDataType();
-            fileOffsetOfChunk = reader.position() - 1;
-            if (header.getNumOfPages() > 0) {
-              PageHeader pageHeader = 
reader.readPageHeader(header.getDataType());
-              numOfPoints += pageHeader.getNumOfValues();
-              startTimeOfChunk = pageHeader.getMinTimestamp();
-              endTimeOfChunk = pageHeader.getMaxTimestamp();
-              reader.skipPageData(pageHeader);
-            }
-            for (int j = 1; j < header.getNumOfPages() - 1; j++) {
-              //a new Page
-              PageHeader pageHeader = 
reader.readPageHeader(header.getDataType());
-              reader.skipPageData(pageHeader);
-            }
-            if (header.getNumOfPages() > 1) {
-              PageHeader pageHeader = 
reader.readPageHeader(header.getDataType());
-              endTimeOfChunk = pageHeader.getMaxTimestamp();
-              reader.skipPageData(pageHeader);
-            }
-            currentChunk = new ChunkMetaData(measurementID, dataType, 
fileOffsetOfChunk,
-                startTimeOfChunk, endTimeOfChunk);
-            currentChunk.setNumOfPoints(numOfPoints);
-            chunks.add(currentChunk);
-            numOfPoints = 0;
-            break;
-          case MetaMarker.CHUNK_GROUP_FOOTER:
-            //this is a chunk group
-            //if there is something wrong with the chunkGroup Footer, we will 
drop this part of data
-            //because we can not guarantee the correction of the deviceId.
-            ChunkGroupFooter chunkGroupFooter = reader.readChunkGroupFooter();
-            deviceID = chunkGroupFooter.getDeviceID();
-            endOffsetOfChunkGroup = reader.position();
-            currentChunkGroup = new ChunkGroupMetaData(deviceID, chunks, 
startOffsetOfChunkGroup);
-            currentChunkGroup.setEndOffsetOfChunkGroup(endOffsetOfChunkGroup);
-            currentChunkGroup.setVersion(versionOfChunkGroup++);
-            chunkGroupMetaDataList.add(currentChunkGroup);
-            // though we have read the current ChunkMetaData from Disk, it may 
be incomplete.
-            // because if the file only loses one byte, the 
ChunkMetaData.deserialize() returns ok,
-            // while the last filed of the ChunkMetaData is incorrect.
-            // So, only reading the next MASK, can make sure that this 
ChunkMetaData is complete.
-            haveReadAnUnverifiedGroupFooter = true;
-            break;
-
-          default:
-            // it is impossible that we read an incorrect data.
-            MetaMarker.handleUnexpectedMarker(marker);
-            goon = false;
+    if (file.exists()) {
+      try (TsFileSequenceReader reader = new 
TsFileSequenceReader(file.getAbsolutePath(), false)) {
+        if (reader.isComplete() && !append) {
+          canWrite = false;
+          out.close();
+          return;
+        }
+        truncatedPosition = reader.selfCheck(knownSchemas, 
chunkGroupMetaDataList, !append);
+        if (truncatedPosition == TsFileCheckStatus.COMPLETE_FILE && !append) {
+            this.canWrite = false;
+            out.close();
+        } else if (truncatedPosition == TsFileCheckStatus.INCOMPATIBLE_FILE) {
+          out.close();
+          throw new IOException(
+              String.format("%s is not in TsFile format.", 
file.getAbsolutePath()));
+        } else if (truncatedPosition == TsFileCheckStatus.ONLY_MAGIC_HEAD) {
+          out.truncate(TSFileConfig.MAGIC_STRING.length());
+        } else {
+          //remove broken data
+          out.truncate(truncatedPosition);
         }
       }
-      //now we read the tail of the file, so we are sure that the last 
ChunkGroupFooter is complete.
-      truncatedPosition = reader.position() - 1;
-    } catch (IOException e2) {
-      //if it is the end of the file, and we read an unverifiedGroupFooter, we 
must remove this ChunkGroup
-      if (haveReadAnUnverifiedGroupFooter && 
!chunkGroupMetaDataList.isEmpty()) {
-        chunkGroupMetaDataList.remove(chunkGroupMetaDataList.size() - 1);
-      }
-    } finally {
-      //something wrong or all data is complete. We will discard current 
FileMetadata
-      // so that we can continue to write data into this tsfile.
-      reader.close();
-      if (autoRepair) {
-        LOGGER.info("File {} has {} bytes, and will be truncated from {}.",
-            file.getAbsolutePath(), file.length(), truncatedPosition);
-        out.truncate(truncatedPosition);
-      }
-    }
-    if (!autoRepair) {
-      out.close();
-      throw new IOException(String
-          .format("%s is incomplete but does not allowed to be repaired...", 
file.getAbsolutePath()));
     }
   }
 
diff --git 
a/tsfile/src/test/java/org/apache/iotdb/tsfile/read/TsFileRestorableReaderTest.java
 
b/tsfile/src/test/java/org/apache/iotdb/tsfile/read/TsFileRestorableReaderTest.java
new file mode 100644
index 0000000..14b6ca3
--- /dev/null
+++ 
b/tsfile/src/test/java/org/apache/iotdb/tsfile/read/TsFileRestorableReaderTest.java
@@ -0,0 +1,59 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iotdb.tsfile.read;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.File;
+import java.io.IOException;
+import org.apache.iotdb.tsfile.common.conf.TSFileConfig;
+import org.apache.iotdb.tsfile.utils.TsFileGeneratorForTest;
+import org.apache.iotdb.tsfile.write.writer.IncompleteFileTestUtil;
+import org.junit.Test;
+
+public class TsFileRestorableReaderTest {
+
+  private static final String FILE_PATH = 
TsFileGeneratorForTest.outputDataFile;
+
+  @Test
+  public void testToReadDamagedFileAndRepair() throws IOException {
+    File file = new File(FILE_PATH);
+
+    IncompleteFileTestUtil.writeFileWithOneIncompleteChunkHeader(file);
+
+    TsFileSequenceReader reader = new TsFileRestorableReader(FILE_PATH, true);
+    String tailMagic = reader.readTailMagic();
+    reader.close();
+
+    // Check if the file was repaired
+    assertEquals(TSFileConfig.MAGIC_STRING, tailMagic);
+    assertTrue(file.delete());
+  }
+
+  @Test(expected = IllegalArgumentException.class)
+  public void testToReadDamagedFileNoRepair() throws IOException {
+    File file = new File(FILE_PATH);
+
+    IncompleteFileTestUtil.writeFileWithOneIncompleteChunkHeader(file);
+    // This should throw an Illegal Argument Exception
+    TsFileSequenceReader reader = new TsFileRestorableReader(FILE_PATH, false);
+  }
+}
diff --git 
a/tsfile/src/test/java/org/apache/iotdb/tsfile/read/TsFileSequenceReaderTest.java
 
b/tsfile/src/test/java/org/apache/iotdb/tsfile/read/TsFileSequenceReaderTest.java
index e5c7f93..7918db9 100644
--- 
a/tsfile/src/test/java/org/apache/iotdb/tsfile/read/TsFileSequenceReaderTest.java
+++ 
b/tsfile/src/test/java/org/apache/iotdb/tsfile/read/TsFileSequenceReaderTest.java
@@ -113,28 +113,5 @@ public class TsFileSequenceReaderTest {
     reader.close();
   }
 
-  @Test
-  public void testToReadDamagedFileAndRepair() throws IOException {
-    File file = new File(FILE_PATH);
-
-    IncompleteFileTestUtil.writeFileWithOneIncompleteChunkHeader(file);
 
-    TsFileSequenceReader reader = new TsFileSequenceReader(FILE_PATH, true, 
true);
-    String tailMagic = reader.readTailMagic();
-    reader.close();
-
-    // Check if the file was repaired
-    assertEquals(TSFileConfig.MAGIC_STRING, tailMagic);
-    assertTrue(file.delete());
-  }
-
-  @Test(expected = IllegalArgumentException.class)
-  public void testToReadDamagedFileNoRepair() throws IOException {
-    File file = new File(FILE_PATH);
-
-    IncompleteFileTestUtil.writeFileWithOneIncompleteChunkHeader(file);
-
-    // This should throw an Illegal Argument Exception
-    TsFileSequenceReader reader = new TsFileSequenceReader(FILE_PATH, true, 
false);
-  }
 }
\ No newline at end of file
diff --git 
a/tsfile/src/test/java/org/apache/iotdb/tsfile/utils/TsFileGeneratorForTest.java
 
b/tsfile/src/test/java/org/apache/iotdb/tsfile/utils/TsFileGeneratorForTest.java
index 757ff79..a2b6acd 100755
--- 
a/tsfile/src/test/java/org/apache/iotdb/tsfile/utils/TsFileGeneratorForTest.java
+++ 
b/tsfile/src/test/java/org/apache/iotdb/tsfile/utils/TsFileGeneratorForTest.java
@@ -164,10 +164,33 @@ public class TsFileGeneratorForTest {
     innerWriter = new TsFileWriter(file, schema, 
TSFileDescriptor.getInstance().getConfig());
 
     // write
-    try {
-      writeToFile(schema);
+    try (Scanner in = new Scanner(new File(inputDataFile))) {
+      long lineCount = 0;
+      long startTime = System.currentTimeMillis();
+      long endTime = System.currentTimeMillis();
+      assert in != null;
+      while (in.hasNextLine()) {
+        if (lineCount % 1000000 == 0) {
+          endTime = System.currentTimeMillis();
+          // logger.info("write line:{},inner space consumer:{},use
+          // 
time:{}",lineCount,innerWriter.calculateMemSizeForEachGroup(),endTime);
+          LOG.info("write line:{},use time:{}s", lineCount, (endTime - 
startTime) / 1000);
+        }
+        String str = in.nextLine();
+        TSRecord record = RecordUtils.parseSimpleTupleRecord(str, schema);
+        innerWriter.write(record);
+        lineCount++;
+      }
+      endTime = System.currentTimeMillis();
+      LOG.info("write line:{},use time:{}s", lineCount, (endTime - startTime) 
/ 1000);
+      endTime = System.currentTimeMillis();
+      LOG.info("write total:{},use time:{}s", lineCount, (endTime - startTime) 
/ 1000);
+      LOG.info("src file size:{}GB", FileUtils.getLocalFileByte(inputDataFile, 
FileUtils.Unit.GB));
+      LOG.info("out file size:{}MB", 
FileUtils.getLocalFileByte(outputDataFile, FileUtils.Unit.MB));
     } catch (WriteProcessException e) {
       e.printStackTrace();
+    } finally {
+      innerWriter.close();
     }
     LOG.info("write to file successfully!!");
   }
@@ -233,43 +256,4 @@ public class TsFileGeneratorForTest {
     return schemaBuilder.build();
   }
 
-  static public void writeToFile(FileSchema schema)
-      throws InterruptedException, IOException, WriteProcessException {
-    Scanner in = getDataFile(inputDataFile);
-    long lineCount = 0;
-    long startTime = System.currentTimeMillis();
-    long endTime = System.currentTimeMillis();
-    assert in != null;
-    while (in.hasNextLine()) {
-      if (lineCount % 1000000 == 0) {
-        endTime = System.currentTimeMillis();
-        // logger.info("write line:{},inner space consumer:{},use
-        // 
time:{}",lineCount,innerWriter.calculateMemSizeForEachGroup(),endTime);
-        LOG.info("write line:{},use time:{}s", lineCount, (endTime - 
startTime) / 1000);
-      }
-      String str = in.nextLine();
-      TSRecord record = RecordUtils.parseSimpleTupleRecord(str, schema);
-      innerWriter.write(record);
-      lineCount++;
-    }
-    endTime = System.currentTimeMillis();
-    LOG.info("write line:{},use time:{}s", lineCount, (endTime - startTime) / 
1000);
-    innerWriter.close();
-    in.close();
-    endTime = System.currentTimeMillis();
-    LOG.info("write total:{},use time:{}s", lineCount, (endTime - startTime) / 
1000);
-    LOG.info("src file size:{}GB", FileUtils.getLocalFileByte(inputDataFile, 
FileUtils.Unit.GB));
-    LOG.info("out file size:{}MB", FileUtils.getLocalFileByte(outputDataFile, 
FileUtils.Unit.MB));
-  }
-
-  static private Scanner getDataFile(String path) {
-    File file = new File(path);
-    try {
-      Scanner in = new Scanner(file);
-      return in;
-    } catch (FileNotFoundException e) {
-      e.printStackTrace();
-      return null;
-    }
-  }
 }
diff --git 
a/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/NativeRestorableIOWriterTest.java
 
b/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/NativeRestorableIOWriterTest.java
index 7fcb301..6ad7e1f 100644
--- 
a/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/NativeRestorableIOWriterTest.java
+++ 
b/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/NativeRestorableIOWriterTest.java
@@ -27,14 +27,14 @@ import static org.junit.Assert.assertTrue;
 import java.io.File;
 import java.io.FileWriter;
 import java.io.IOException;
-import java.nio.ByteBuffer;
+import org.apache.iotdb.tsfile.common.conf.TSFileConfig;
 import org.apache.iotdb.tsfile.file.MetaMarker;
-import org.apache.iotdb.tsfile.file.header.ChunkHeader;
 import org.apache.iotdb.tsfile.file.metadata.TsDeviceMetadataIndex;
 import org.apache.iotdb.tsfile.file.metadata.enums.CompressionType;
 import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType;
 import org.apache.iotdb.tsfile.file.metadata.enums.TSEncoding;
 import org.apache.iotdb.tsfile.file.metadata.statistics.FloatStatistics;
+import org.apache.iotdb.tsfile.read.TsFileCheckStatus;
 import org.apache.iotdb.tsfile.read.TsFileSequenceReader;
 import org.apache.iotdb.tsfile.write.TsFileWriter;
 import org.apache.iotdb.tsfile.write.record.TSRecord;
@@ -47,7 +47,7 @@ public class NativeRestorableIOWriterTest {
 
   private static final String FILE_NAME = "test.ts";
 
-  @Test
+  @Test(expected = IOException.class)
   public void testBadHeadMagic() throws Exception {
     File file = new File(FILE_NAME);
     FileWriter fWriter = new FileWriter(file);
@@ -55,10 +55,9 @@ public class NativeRestorableIOWriterTest {
     fWriter.close();
     try {
       NativeRestorableIOWriter rWriter = new NativeRestorableIOWriter(file);
-    } catch (IOException e) {
-      assertTrue(e.getMessage().contains("is not using TsFile format"));
+    } finally {
+      assertTrue(file.delete());
     }
-    assertTrue(file.delete());
   }
 
   @Test
@@ -66,16 +65,16 @@ public class NativeRestorableIOWriterTest {
     File file = new File(FILE_NAME);
     TsFileWriter writer = new TsFileWriter(file);
     writer.getIOWriter().forceClose();
-    try {
-      NativeRestorableIOWriter rWriter = new NativeRestorableIOWriter(file, 
false);
-      writer = new TsFileWriter(rWriter);
-      writer.close();
-    } catch (IOException e) {
-      assertTrue(e.getMessage().contains("only has header"));
-    }
 
     NativeRestorableIOWriter rWriter = new NativeRestorableIOWriter(file);
-    assertEquals(TsFileIOWriter.magicStringBytes.length, 
rWriter.getTruncatedPosition());
+    writer = new TsFileWriter(rWriter);
+    writer.close();
+
+    rWriter = new NativeRestorableIOWriter(file);
+    assertEquals(TsFileCheckStatus.COMPLETE_FILE, 
rWriter.getTruncatedPosition());
+    assertFalse(rWriter.canWrite());
+    rWriter = new NativeRestorableIOWriter(file, true);
+    assertEquals(TSFileConfig.MAGIC_STRING.length(), 
rWriter.getTruncatedPosition());
     writer = new TsFileWriter(rWriter);
     writer.close();
     assertTrue(file.delete());
@@ -297,6 +296,20 @@ public class NativeRestorableIOWriterTest {
 
     NativeRestorableIOWriter rWriter = new NativeRestorableIOWriter(file);
     assertFalse(rWriter.canWrite());
+    rWriter.forceClose();
+
+    rWriter = new NativeRestorableIOWriter(file, true);
+    assertTrue(rWriter.canWrite());
+    writer = new TsFileWriter(rWriter);
+    writer.write(new TSRecord(3, "d1").addTuple(new FloatDataPoint("s1", 5))
+        .addTuple(new FloatDataPoint("s2", 4)));
+    writer.write(new TSRecord(4, "d1").addTuple(new FloatDataPoint("s1", 5))
+        .addTuple(new FloatDataPoint("s2", 4)));
+    writer.close();
+    TsFileSequenceReader reader = new TsFileSequenceReader(FILE_NAME);
+    TsDeviceMetadataIndex index = 
reader.readFileMetadata().getDeviceMap().get("d1");
+    assertEquals(2, 
reader.readTsDeviceMetaData(index).getChunkGroupMetaDataList().size());
+    reader.close();
     assertTrue(file.delete());
   }
 }
\ No newline at end of file

Reply via email to