manojpec commented on a change in pull request #4352:
URL: https://github.com/apache/hudi/pull/4352#discussion_r789072971
##########
File path:
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieKeyLookupHandle.java
##########
@@ -46,52 +50,54 @@
private static final Logger LOG =
LogManager.getLogger(HoodieKeyLookupHandle.class);
- private final HoodieTableType tableType;
-
private final BloomFilter bloomFilter;
-
private final List<String> candidateRecordKeys;
-
+ private final boolean useMetadataTableIndex;
+ private Option<String> fileName = Option.empty();
private long totalKeysChecked;
public HoodieKeyLookupHandle(HoodieWriteConfig config, HoodieTable<T, I, K,
O> hoodieTable,
- Pair<String, String> partitionPathFilePair) {
- super(config, null, hoodieTable, partitionPathFilePair);
- this.tableType = hoodieTable.getMetaClient().getTableType();
+ Pair<String, String> partitionPathFileIDPair) {
+ this(config, hoodieTable, partitionPathFileIDPair, Option.empty(), false);
+ }
+
+ public HoodieKeyLookupHandle(HoodieWriteConfig config, HoodieTable<T, I, K,
O> hoodieTable,
+ Pair<String, String> partitionPathFileIDPair,
Option<String> fileName,
+ boolean useMetadataTableIndex) {
+ super(config, null, hoodieTable, partitionPathFileIDPair);
this.candidateRecordKeys = new ArrayList<>();
this.totalKeysChecked = 0;
- HoodieTimer timer = new HoodieTimer().startTimer();
-
- try {
- this.bloomFilter = createNewFileReader().readBloomFilter();
- } catch (IOException e) {
- throw new HoodieIndexException(String.format("Error reading bloom filter
from %s: %s", partitionPathFilePair, e));
+ if (fileName.isPresent()) {
+
ValidationUtils.checkArgument(FSUtils.getFileId(fileName.get()).equals(getFileId()));
+ this.fileName = fileName;
}
- LOG.info(String.format("Read bloom filter from %s in %d ms",
partitionPathFilePair, timer.endTimer()));
+ this.useMetadataTableIndex = useMetadataTableIndex;
+ this.bloomFilter = getBloomFilter();
}
- /**
- * Given a list of row keys and one file, return only row keys existing in
that file.
- */
- public List<String> checkCandidatesAgainstFile(Configuration configuration,
List<String> candidateRecordKeys,
- Path filePath) throws
HoodieIndexException {
- List<String> foundRecordKeys = new ArrayList<>();
+ private BloomFilter getBloomFilter() {
+ BloomFilter bloomFilter = null;
+ HoodieTimer timer = new HoodieTimer().startTimer();
try {
- // Load all rowKeys from the file, to double-confirm
- if (!candidateRecordKeys.isEmpty()) {
- HoodieTimer timer = new HoodieTimer().startTimer();
- Set<String> fileRowKeys = createNewFileReader().filterRowKeys(new
HashSet<>(candidateRecordKeys));
- foundRecordKeys.addAll(fileRowKeys);
- LOG.info(String.format("Checked keys against file %s, in %d ms.
#candidates (%d) #found (%d)", filePath,
- timer.endTimer(), candidateRecordKeys.size(),
foundRecordKeys.size()));
- if (LOG.isDebugEnabled()) {
- LOG.debug("Keys matching for file " + filePath + " => " +
foundRecordKeys);
+ if (this.useMetadataTableIndex) {
+ ValidationUtils.checkArgument(this.fileName.isPresent());
Review comment:
Fixed.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]