This is an automated email from the ASF dual-hosted git repository.

danny0405 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new 09a7953f125 [HUDI-6092] Reuse schema objects while deserializing log 
blocks (#8484)
09a7953f125 is described below

commit 09a7953f12535be9be746809fb79fd5f23df083f
Author: Prashant Wason <[email protected]>
AuthorDate: Sat Apr 22 01:35:57 2023 -0700

    [HUDI-6092] Reuse schema objects while deserializing log blocks (#8484)
---
 .../apache/hudi/io/storage/TestHoodieHFileReaderWriter.java    |  2 +-
 .../hudi/common/table/log/block/HoodieAvroDataBlock.java       |  4 +---
 .../apache/hudi/common/table/log/block/HoodieDataBlock.java    | 10 ++++++++++
 .../hudi/common/table/log/block/HoodieHFileDataBlock.java      |  8 +++-----
 .../java/org/apache/hudi/io/storage/HoodieAvroHFileReader.java |  4 ++--
 5 files changed, 17 insertions(+), 11 deletions(-)

diff --git 
a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java
 
b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java
index 815b04c9dbc..70d2ebfec03 100644
--- 
a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java
+++ 
b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java
@@ -112,7 +112,7 @@ public class TestHoodieHFileReaderWriter extends 
TestHoodieReaderWriterBase {
   protected HoodieAvroFileReader createReader(
       Configuration conf) throws Exception {
     CacheConfig cacheConfig = new CacheConfig(conf);
-    return new HoodieAvroHFileReader(conf, getFilePath(), cacheConfig, 
getFilePath().getFileSystem(conf));
+    return new HoodieAvroHFileReader(conf, getFilePath(), cacheConfig, 
getFilePath().getFileSystem(conf), Option.empty());
   }
 
   @Override
diff --git 
a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieAvroDataBlock.java
 
b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieAvroDataBlock.java
index 752b4db3182..992aa3881b6 100644
--- 
a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieAvroDataBlock.java
+++ 
b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieAvroDataBlock.java
@@ -167,9 +167,7 @@ public class HoodieAvroDataBlock extends HoodieDataBlock {
     }
 
     public static RecordIterator getInstance(HoodieAvroDataBlock dataBlock, 
byte[] content) throws IOException {
-      // Get schema from the header
-      Schema writerSchema = new 
Schema.Parser().parse(dataBlock.getLogBlockHeader().get(HeaderMetadataType.SCHEMA));
-      return new RecordIterator(dataBlock.readerSchema, writerSchema, content);
+      return new RecordIterator(dataBlock.readerSchema, 
dataBlock.getSchemaFromHeader(), content);
     }
 
     @Override
diff --git 
a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieDataBlock.java
 
b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieDataBlock.java
index d694eb5bf62..b0e885d1252 100644
--- 
a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieDataBlock.java
+++ 
b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieDataBlock.java
@@ -33,6 +33,7 @@ import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
 import java.util.function.Function;
 
 import static org.apache.hudi.common.util.TypeUtils.unsafeCast;
@@ -62,6 +63,9 @@ public abstract class HoodieDataBlock extends HoodieLogBlock {
 
   protected Schema readerSchema;
 
+  //  Map of string schema to parsed schema.
+  private static ConcurrentHashMap<String, Schema> schemaMap = new 
ConcurrentHashMap<>();
+
   /**
    * NOTE: This ctor is used on the write-path (ie when records ought to be 
written into the log)
    */
@@ -194,6 +198,12 @@ public abstract class HoodieDataBlock extends 
HoodieLogBlock {
     return Option.ofNullable(record.getRecordKey(readerSchema, keyFieldName));
   }
 
+  protected Schema getSchemaFromHeader() {
+    String schemaStr = getLogBlockHeader().get(HeaderMetadataType.SCHEMA);
+    schemaMap.computeIfAbsent(schemaStr, (schemaString) -> new 
Schema.Parser().parse(schemaString));
+    return schemaMap.get(schemaStr);
+  }
+
   /**
    * Converts the given list to closable iterator.
    */
diff --git 
a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java
 
b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java
index 2faa9dda4db..a948ff2344d 100644
--- 
a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java
+++ 
b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java
@@ -166,12 +166,9 @@ public class HoodieHFileDataBlock extends HoodieDataBlock {
   protected <T> ClosableIterator<HoodieRecord<T>> deserializeRecords(byte[] 
content, HoodieRecordType type) throws IOException {
     checkState(readerSchema != null, "Reader's schema has to be non-null");
 
-    // Get schema from the header
-    Schema writerSchema = new 
Schema.Parser().parse(super.getLogBlockHeader().get(HeaderMetadataType.SCHEMA));
-
     FileSystem fs = FSUtils.getFs(pathForReader.toString(), 
FSUtils.buildInlineConf(getBlockContentLocation().get().getHadoopConf()));
     // Read the content
-    HoodieAvroHFileReader reader = new HoodieAvroHFileReader(fs, 
pathForReader, content, Option.of(writerSchema));
+    HoodieAvroHFileReader reader = new HoodieAvroHFileReader(fs, 
pathForReader, content, Option.of(getSchemaFromHeader()));
     return unsafeCast(reader.getRecordIterator(readerSchema));
   }
 
@@ -196,7 +193,8 @@ public class HoodieHFileDataBlock extends HoodieDataBlock {
     Collections.sort(sortedKeys);
 
     final HoodieAvroHFileReader reader =
-             new HoodieAvroHFileReader(inlineConf, inlinePath, new 
CacheConfig(inlineConf), inlinePath.getFileSystem(inlineConf));
+             new HoodieAvroHFileReader(inlineConf, inlinePath, new 
CacheConfig(inlineConf), inlinePath.getFileSystem(inlineConf),
+             Option.of(getSchemaFromHeader()));
 
     // Get writer's schema from the header
     final ClosableIterator<HoodieRecord<IndexedRecord>> recordIterator =
diff --git 
a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroHFileReader.java
 
b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroHFileReader.java
index 60440bcbf04..1a9da7e3b8f 100644
--- 
a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroHFileReader.java
+++ 
b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroHFileReader.java
@@ -102,8 +102,8 @@ public class HoodieAvroHFileReader extends 
HoodieAvroFileReaderBase implements H
         Option.empty());
   }
 
-  public HoodieAvroHFileReader(Configuration hadoopConf, Path path, 
CacheConfig cacheConfig, FileSystem fs) throws IOException {
-    this(path, HoodieHFileUtils.createHFileReader(fs, path, cacheConfig, 
hadoopConf), Option.empty());
+  public HoodieAvroHFileReader(Configuration hadoopConf, Path path, 
CacheConfig cacheConfig, FileSystem fs, Option<Schema> schemaOpt) throws 
IOException {
+    this(path, HoodieHFileUtils.createHFileReader(fs, path, cacheConfig, 
hadoopConf), schemaOpt);
   }
 
   public HoodieAvroHFileReader(FileSystem fs, Path dummyPath, byte[] content, 
Option<Schema> schemaOpt) throws IOException {

Reply via email to