This is an automated email from the ASF dual-hosted git repository.

vinoth pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new 26f24b6  Removing OLD MAGIC header since a) it's no longer used b) 
causes issues when the data actually has OLD MAGIC
26f24b6 is described below

commit 26f24b6728343b2788125edf8de94a4800fd8c0e
Author: Nishith Agarwal <[email protected]>
AuthorDate: Fri Apr 19 11:21:45 2019 -0700

    Removing OLD MAGIC header since a) it's no longer used b) causes issues 
when the data actually has OLD MAGIC
---
 .../common/table/log/HoodieLogFileReader.java      | 39 ++---------
 .../hoodie/common/table/log/HoodieLogFormat.java   | 17 ++---
 .../common/table/log/HoodieLogFormatTest.java      | 79 +---------------------
 3 files changed, 10 insertions(+), 125 deletions(-)

diff --git 
a/hoodie-common/src/main/java/com/uber/hoodie/common/table/log/HoodieLogFileReader.java
 
b/hoodie-common/src/main/java/com/uber/hoodie/common/table/log/HoodieLogFileReader.java
index 836870e..da0f518 100644
--- 
a/hoodie-common/src/main/java/com/uber/hoodie/common/table/log/HoodieLogFileReader.java
+++ 
b/hoodie-common/src/main/java/com/uber/hoodie/common/table/log/HoodieLogFileReader.java
@@ -54,7 +54,6 @@ class HoodieLogFileReader implements HoodieLogFormat.Reader {
 
   private final FSDataInputStream inputStream;
   private final HoodieLogFile logFile;
-  private static final byte[] oldMagicBuffer = new byte[4];
   private static final byte[] magicBuffer = new byte[6];
   private final Schema readerSchema;
   private HoodieLogFormat.LogFormatVersion nextBlockVersion;
@@ -121,23 +120,11 @@ class HoodieLogFileReader implements 
HoodieLogFormat.Reader {
     Map<HeaderMetadataType, String> header = null;
 
     try {
-      if (isOldMagic()) {
-        // 1 Read the block type for a log block
-        type = inputStream.readInt();
-
-        Preconditions.checkArgument(type < HoodieLogBlockType.values().length,
-            "Invalid block byte type found " + type);
-        blockType = HoodieLogBlockType.values()[type];
-
-        // 2 Read the total size of the block
-        blocksize = inputStream.readInt();
-      } else {
-        // 1 Read the total size of the block
-        blocksize = (int) inputStream.readLong();
-      }
+      // 1 Read the total size of the block
+      blocksize = (int) inputStream.readLong();
     } catch (EOFException | CorruptedLogFileException e) {
       // An exception reading any of the above indicates a corrupt block
-      // Create a corrupt block by finding the next OLD_MAGIC marker or EOF
+      // Create a corrupt block by finding the next MAGIC marker or EOF
       return createCorruptBlock();
     }
 
@@ -297,22 +284,12 @@ class HoodieLogFileReader implements 
HoodieLogFormat.Reader {
   }
 
   /**
-   * Read log format version from log file, if present For old log files 
written with Magic header
-   * OLD_MAGIC and without version, return DEFAULT_VERSION
+   * Read log format version from log file.
    */
   private HoodieLogFormat.LogFormatVersion readVersion() throws IOException {
-    // If not old log file format (written with Magic header OLD_MAGIC), then 
read log version
-    if (Arrays.equals(oldMagicBuffer, HoodieLogFormat.OLD_MAGIC)) {
-      Arrays.fill(oldMagicBuffer, (byte) 0);
-      return new 
HoodieLogFormatVersion(HoodieLogFormatVersion.DEFAULT_VERSION);
-    }
     return new HoodieLogFormatVersion(inputStream.readInt());
   }
 
-  private boolean isOldMagic() {
-    return Arrays.equals(oldMagicBuffer, HoodieLogFormat.OLD_MAGIC);
-  }
-
 
   private boolean readMagic() throws IOException {
     try {
@@ -334,13 +311,7 @@ class HoodieLogFileReader implements 
HoodieLogFormat.Reader {
     // 1. Read magic header from the start of the block
     inputStream.readFully(magicBuffer, 0, 6);
     if (!Arrays.equals(magicBuffer, HoodieLogFormat.MAGIC)) {
-      inputStream.seek(pos);
-      // 1. Read old magic header from the start of the block
-      // (for backwards compatibility of older log files written without log 
version)
-      inputStream.readFully(oldMagicBuffer, 0, 4);
-      if (!Arrays.equals(oldMagicBuffer, HoodieLogFormat.OLD_MAGIC)) {
-        return false;
-      }
+      return false;
     }
     return true;
   }
diff --git 
a/hoodie-common/src/main/java/com/uber/hoodie/common/table/log/HoodieLogFormat.java
 
b/hoodie-common/src/main/java/com/uber/hoodie/common/table/log/HoodieLogFormat.java
index 30d0093..93e5ac7 100644
--- 
a/hoodie-common/src/main/java/com/uber/hoodie/common/table/log/HoodieLogFormat.java
+++ 
b/hoodie-common/src/main/java/com/uber/hoodie/common/table/log/HoodieLogFormat.java
@@ -29,25 +29,16 @@ import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
 /**
- * File Format for Hoodie Log Files. The File Format consists of blocks each 
seperated with a
- * OLD_MAGIC sync marker. A Block can either be a Data block, Command block or 
Delete Block. Data
+ * File Format for Hoodie Log Files. The File Format consists of blocks each 
separated with a
+ * MAGIC sync marker. A Block can either be a Data block, Command block or 
Delete Block. Data
  * Block - Contains log records serialized as Avro Binary Format Command Block 
- Specific commands
- * like RoLLBACK_PREVIOUS-BLOCK - Tombstone for the previously written block 
Delete Block - List of
+ * like ROLLBACK_PREVIOUS-BLOCK - Tombstone for the previously written block 
Delete Block - List of
  * keys to delete - tombstone for keys
  */
 public interface HoodieLogFormat {
 
   /**
-   * Magic 4 bytes we put at the start of every block in the log file. Sync 
marker. We could make
-   * this file specific (generate a random 4 byte magic and stick it in the 
file header), but this I
-   * think is suffice for now - PR
-   */
-  byte[] OLD_MAGIC = new byte[] {'H', 'U', 'D', 'I'};
-
-  /**
-   * Magic 6 bytes we put at the start of every block in the log file. This is 
added to maintain
-   * backwards compatiblity due to lack of log format/block version in older 
log files. All new log
-   * block will now write this OLD_MAGIC value
+   * Magic 6 bytes we put at the start of every block in the log file.
    */
   byte[] MAGIC = new byte[] {'#', 'H', 'U', 'D', 'I', '#'};
 
diff --git 
a/hoodie-common/src/test/java/com/uber/hoodie/common/table/log/HoodieLogFormatTest.java
 
b/hoodie-common/src/test/java/com/uber/hoodie/common/table/log/HoodieLogFormatTest.java
index cc795ff..602840c 100644
--- 
a/hoodie-common/src/test/java/com/uber/hoodie/common/table/log/HoodieLogFormatTest.java
+++ 
b/hoodie-common/src/test/java/com/uber/hoodie/common/table/log/HoodieLogFormatTest.java
@@ -1057,83 +1057,6 @@ public class HoodieLogFormatTest {
     assertEquals("We would read 0 records", 0, scanner.getTotalLogRecords());
   }
 
-  @Test
-  public void testMagicAndLogVersionsBackwardsCompatibility()
-      throws IOException, InterruptedException, URISyntaxException {
-    // Create the log file
-    Writer writer = 
HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
-        
.withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1")
-        .overBaseCommit("100").withFs(fs).build();
-    Schema schema = HoodieAvroUtils.addMetadataFields(getSimpleSchema());
-    List<IndexedRecord> records = SchemaTestUtil.generateHoodieTestRecords(0, 
100);
-    Map<HoodieLogBlock.HeaderMetadataType, String> header = Maps.newHashMap();
-    header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
-    writer.close();
-
-    // Write 1 with OLD_MAGIC and no log format version
-    // Append a log block to end of the log (mimics a log block with old format
-    // fs = FSUtils.getFs(fs.getUri().toString(), fs.getConf());
-    FSDataOutputStream outputStream = fs.append(writer.getLogFile().getPath());
-    // create a block with
-    outputStream.write(HoodieLogFormat.OLD_MAGIC);
-    outputStream.writeInt(HoodieLogBlockType.AVRO_DATA_BLOCK.ordinal());
-    // Write out a length that does not confirm with the content
-    records = SchemaTestUtil.generateHoodieTestRecords(0, 100);
-    header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, new 
String(HoodieAvroUtils.compress(schema.toString())));
-    HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records, schema);
-    byte[] content = dataBlock.getBytes(schema);
-    outputStream.writeInt(content.length);
-    // Write out some content
-    outputStream.write(content);
-    outputStream.flush();
-    outputStream.hflush();
-    outputStream.close();
-
-    writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
-        
.withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1").overBaseCommit("100")
-        .withFs(fs).build();
-
-    // Write 2 with MAGIC and latest log format version
-    records = SchemaTestUtil.generateHoodieTestRecords(0, 100);
-    header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString());
-    dataBlock = new HoodieAvroDataBlock(records, header);
-    writer = writer.appendBlock(dataBlock);
-
-    // Write 3 with MAGIC and latest log format version
-    writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
-        
.withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1").overBaseCommit("100")
-        .withFs(fs).build();
-    records = SchemaTestUtil.generateHoodieTestRecords(0, 100);
-    header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString());
-    dataBlock = new HoodieAvroDataBlock(records, header);
-    writer = writer.appendBlock(dataBlock);
-    writer.close();
-
-    Reader reader = HoodieLogFormat.newReader(fs, writer.getLogFile(), schema);
-
-    // Read the first block written with latest version and magic
-    reader.hasNext();
-    HoodieLogBlock block = reader.next();
-    assertEquals(block.getBlockType(), HoodieLogBlockType.AVRO_DATA_BLOCK);
-    HoodieAvroDataBlock dBlock = (HoodieAvroDataBlock) block;
-    assertEquals(dBlock.getRecords().size(), 100);
-
-    // Read second block written with old magic and no version
-    reader.hasNext();
-    block = reader.next();
-    assertEquals(block.getBlockType(), HoodieLogBlockType.AVRO_DATA_BLOCK);
-    dBlock = (HoodieAvroDataBlock) block;
-    assertEquals(dBlock.getRecords().size(), 100);
-
-    //Read third block written with latest version and magic
-    reader.hasNext();
-    block = reader.next();
-    assertEquals(block.getBlockType(), HoodieLogBlockType.AVRO_DATA_BLOCK);
-    dBlock = (HoodieAvroDataBlock) block;
-    assertEquals(dBlock.getRecords().size(), 100);
-    reader.close();
-  }
-
   @SuppressWarnings("unchecked")
   @Test
   public void testBasicAppendAndReadInReverse() throws IOException, 
URISyntaxException, InterruptedException {
@@ -1222,7 +1145,7 @@ public class HoodieLogFormatTest {
     fs = FSUtils.getFs(fs.getUri().toString(), fs.getConf());
     FSDataOutputStream outputStream = fs.append(writer.getLogFile().getPath());
     // create a block with
-    outputStream.write(HoodieLogFormat.OLD_MAGIC);
+    outputStream.write(HoodieLogFormat.MAGIC);
     outputStream.writeInt(HoodieLogBlockType.AVRO_DATA_BLOCK.ordinal());
     // Write out a length that does not confirm with the content
     outputStream.writeInt(1000);

Reply via email to