yihua commented on a change in pull request #5004:
URL: https://github.com/apache/hudi/pull/5004#discussion_r830386533
##########
File path:
hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java
##########
@@ -263,7 +272,7 @@ public BloomFilter readBloomFilter() {
public ClosableIterator<R> getRecordIterator(List<String> keys, Schema
schema) throws IOException {
this.schema = schema;
- reader.loadFileInfo();
+ reader.getHFileInfo();
Review comment:
This is removed now.
##########
File path:
hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystem.java
##########
@@ -369,7 +369,8 @@ private Path getRandomInlinePath() {
private void verifyFileStatus(FileStatus expected, Path inlinePath, long
expectedLength, FileStatus actual) {
assertEquals(inlinePath, actual.getPath());
assertEquals(expectedLength, actual.getLen());
- assertEquals(expected.getAccessTime(), actual.getAccessTime());
+ // removing below assertion as it is flaky on rare occasion (difference is
in single-digit ms)
Review comment:
I create a separate PR #5069 for this, not to overload this PR.
##########
File path:
hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java
##########
@@ -250,7 +259,7 @@ public BloomFilter readBloomFilter() {
*/
public List<Pair<String, R>> readRecords(List<String> keys, Schema schema)
throws IOException {
this.schema = schema;
- reader.loadFileInfo();
+ reader.getHFileInfo();
Review comment:
This is removed now.
##########
File path:
hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java
##########
@@ -80,45 +85,49 @@
public HoodieHFileReader(Configuration configuration, Path path, CacheConfig
cacheConfig) throws IOException {
this.conf = configuration;
this.path = path;
- this.reader = HFile.createReader(FSUtils.getFs(path.toString(),
configuration), path, cacheConfig, conf);
+ this.reader = HFile.createReader(FSUtils.getFs(path.toString(),
configuration), path, cacheConfig, true, conf);
}
public HoodieHFileReader(Configuration configuration, Path path, CacheConfig
cacheConfig, FileSystem fs) throws IOException {
this.conf = configuration;
this.path = path;
this.fsDataInputStream = fs.open(path);
- this.reader = HFile.createReader(fs, path, cacheConfig, configuration);
+ this.reader = HFile.createReader(fs, path, cacheConfig, true,
configuration);
}
public HoodieHFileReader(byte[] content) throws IOException {
Configuration conf = new Configuration();
Path path = new Path("hoodie");
SeekableByteArrayInputStream bis = new
SeekableByteArrayInputStream(content);
FSDataInputStream fsdis = new FSDataInputStream(bis);
- this.reader = HFile.createReader(FSUtils.getFs("hoodie", conf), path, new
FSDataInputStreamWrapper(fsdis),
- content.length, new CacheConfig(conf), conf);
+ FSDataInputStreamWrapper stream = new FSDataInputStreamWrapper(fsdis);
+ FileSystem fs = FSUtils.getFs("hoodie", conf);
+ HFileSystem hfs = (fs instanceof HFileSystem) ? (HFileSystem) fs : new
HFileSystem(fs);
+ ReaderContext context = new ReaderContextBuilder()
+ .withFilePath(path)
+ .withInputStreamWrapper(stream)
+ .withFileSize(content.length)
+ .withFileSystem(hfs)
+ .withPrimaryReplicaReader(true)
+ .withReaderType(ReaderContext.ReaderType.STREAM)
+ .build();
+ HFileInfo fileInfo = new HFileInfo(context, conf);
+ this.reader = HFile.createReader(context, fileInfo, new CacheConfig(conf),
conf);
+ fileInfo.initMetaAndIndex(reader);
}
@Override
public String[] readMinMaxRecordKeys() {
- try {
- Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
- return new String[] { new
String(fileInfo.get(KEY_MIN_RECORD.getBytes())),
- new String(fileInfo.get(KEY_MAX_RECORD.getBytes()))};
- } catch (IOException e) {
- throw new HoodieException("Could not read min/max record key out of file
information block correctly from path", e);
- }
+ HFileInfo fileInfo = reader.getHFileInfo();
Review comment:
I added more unit tests around the HFile reader.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]