This is an automated email from the ASF dual-hosted git repository.
danny0405 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new 09a7953f125 [HUDI-6092] Reuse schema objects while deserializing log
blocks (#8484)
09a7953f125 is described below
commit 09a7953f12535be9be746809fb79fd5f23df083f
Author: Prashant Wason <[email protected]>
AuthorDate: Sat Apr 22 01:35:57 2023 -0700
[HUDI-6092] Reuse schema objects while deserializing log blocks (#8484)
---
.../apache/hudi/io/storage/TestHoodieHFileReaderWriter.java | 2 +-
.../hudi/common/table/log/block/HoodieAvroDataBlock.java | 4 +---
.../apache/hudi/common/table/log/block/HoodieDataBlock.java | 10 ++++++++++
.../hudi/common/table/log/block/HoodieHFileDataBlock.java | 8 +++-----
.../java/org/apache/hudi/io/storage/HoodieAvroHFileReader.java | 4 ++--
5 files changed, 17 insertions(+), 11 deletions(-)
diff --git
a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java
b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java
index 815b04c9dbc..70d2ebfec03 100644
---
a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java
+++
b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java
@@ -112,7 +112,7 @@ public class TestHoodieHFileReaderWriter extends
TestHoodieReaderWriterBase {
protected HoodieAvroFileReader createReader(
Configuration conf) throws Exception {
CacheConfig cacheConfig = new CacheConfig(conf);
- return new HoodieAvroHFileReader(conf, getFilePath(), cacheConfig,
getFilePath().getFileSystem(conf));
+ return new HoodieAvroHFileReader(conf, getFilePath(), cacheConfig,
getFilePath().getFileSystem(conf), Option.empty());
}
@Override
diff --git
a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieAvroDataBlock.java
b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieAvroDataBlock.java
index 752b4db3182..992aa3881b6 100644
---
a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieAvroDataBlock.java
+++
b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieAvroDataBlock.java
@@ -167,9 +167,7 @@ public class HoodieAvroDataBlock extends HoodieDataBlock {
}
public static RecordIterator getInstance(HoodieAvroDataBlock dataBlock,
byte[] content) throws IOException {
- // Get schema from the header
- Schema writerSchema = new
Schema.Parser().parse(dataBlock.getLogBlockHeader().get(HeaderMetadataType.SCHEMA));
- return new RecordIterator(dataBlock.readerSchema, writerSchema, content);
+ return new RecordIterator(dataBlock.readerSchema,
dataBlock.getSchemaFromHeader(), content);
}
@Override
diff --git
a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieDataBlock.java
b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieDataBlock.java
index d694eb5bf62..b0e885d1252 100644
---
a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieDataBlock.java
+++
b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieDataBlock.java
@@ -33,6 +33,7 @@ import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
import java.util.function.Function;
import static org.apache.hudi.common.util.TypeUtils.unsafeCast;
@@ -62,6 +63,9 @@ public abstract class HoodieDataBlock extends HoodieLogBlock {
protected Schema readerSchema;
+ // Map of string schema to parsed schema.
+ private static ConcurrentHashMap<String, Schema> schemaMap = new
ConcurrentHashMap<>();
+
/**
* NOTE: This ctor is used on the write-path (ie when records ought to be
written into the log)
*/
@@ -194,6 +198,12 @@ public abstract class HoodieDataBlock extends
HoodieLogBlock {
return Option.ofNullable(record.getRecordKey(readerSchema, keyFieldName));
}
+ protected Schema getSchemaFromHeader() {
+ String schemaStr = getLogBlockHeader().get(HeaderMetadataType.SCHEMA);
+ schemaMap.computeIfAbsent(schemaStr, (schemaString) -> new
Schema.Parser().parse(schemaString));
+ return schemaMap.get(schemaStr);
+ }
+
/**
* Converts the given list to closable iterator.
*/
diff --git
a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java
b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java
index 2faa9dda4db..a948ff2344d 100644
---
a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java
+++
b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java
@@ -166,12 +166,9 @@ public class HoodieHFileDataBlock extends HoodieDataBlock {
protected <T> ClosableIterator<HoodieRecord<T>> deserializeRecords(byte[]
content, HoodieRecordType type) throws IOException {
checkState(readerSchema != null, "Reader's schema has to be non-null");
- // Get schema from the header
- Schema writerSchema = new
Schema.Parser().parse(super.getLogBlockHeader().get(HeaderMetadataType.SCHEMA));
-
FileSystem fs = FSUtils.getFs(pathForReader.toString(),
FSUtils.buildInlineConf(getBlockContentLocation().get().getHadoopConf()));
// Read the content
- HoodieAvroHFileReader reader = new HoodieAvroHFileReader(fs,
pathForReader, content, Option.of(writerSchema));
+ HoodieAvroHFileReader reader = new HoodieAvroHFileReader(fs,
pathForReader, content, Option.of(getSchemaFromHeader()));
return unsafeCast(reader.getRecordIterator(readerSchema));
}
@@ -196,7 +193,8 @@ public class HoodieHFileDataBlock extends HoodieDataBlock {
Collections.sort(sortedKeys);
final HoodieAvroHFileReader reader =
- new HoodieAvroHFileReader(inlineConf, inlinePath, new
CacheConfig(inlineConf), inlinePath.getFileSystem(inlineConf));
+ new HoodieAvroHFileReader(inlineConf, inlinePath, new
CacheConfig(inlineConf), inlinePath.getFileSystem(inlineConf),
+ Option.of(getSchemaFromHeader()));
// Get writer's schema from the header
final ClosableIterator<HoodieRecord<IndexedRecord>> recordIterator =
diff --git
a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroHFileReader.java
b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroHFileReader.java
index 60440bcbf04..1a9da7e3b8f 100644
---
a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroHFileReader.java
+++
b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroHFileReader.java
@@ -102,8 +102,8 @@ public class HoodieAvroHFileReader extends
HoodieAvroFileReaderBase implements H
Option.empty());
}
- public HoodieAvroHFileReader(Configuration hadoopConf, Path path,
CacheConfig cacheConfig, FileSystem fs) throws IOException {
- this(path, HoodieHFileUtils.createHFileReader(fs, path, cacheConfig,
hadoopConf), Option.empty());
+ public HoodieAvroHFileReader(Configuration hadoopConf, Path path,
CacheConfig cacheConfig, FileSystem fs, Option<Schema> schemaOpt) throws
IOException {
+ this(path, HoodieHFileUtils.createHFileReader(fs, path, cacheConfig,
hadoopConf), schemaOpt);
}
public HoodieAvroHFileReader(FileSystem fs, Path dummyPath, byte[] content,
Option<Schema> schemaOpt) throws IOException {