n3nash commented on a change in pull request #1978:
URL: https://github.com/apache/hudi/pull/1978#discussion_r480453812
##########
File path: hudi-client/src/main/java/org/apache/hudi/index/hbase/HBaseIndex.java
##########
@@ -213,36 +215,61 @@ private boolean checkIfValidCommit(HoodieTableMetaClient
metaClient, String comm
statements.add(generateStatement(rec.getRecordKey()));
currentBatchOfRecords.add(rec);
// iterator till we reach batch size
- if (statements.size() >= multiGetBatchSize ||
!hoodieRecordIterator.hasNext()) {
- // get results for batch from Hbase
- Result[] results = doGet(hTable, statements);
- // clear statements to be GC'd
- statements.clear();
- for (Result result : results) {
- // first, attempt to grab location from HBase
- HoodieRecord currentRecord = currentBatchOfRecords.remove(0);
- if (result.getRow() != null) {
- String keyFromResult = Bytes.toString(result.getRow());
- String commitTs =
Bytes.toString(result.getValue(SYSTEM_COLUMN_FAMILY, COMMIT_TS_COLUMN));
- String fileId =
Bytes.toString(result.getValue(SYSTEM_COLUMN_FAMILY, FILE_NAME_COLUMN));
- String partitionPath =
Bytes.toString(result.getValue(SYSTEM_COLUMN_FAMILY, PARTITION_PATH_COLUMN));
-
- if (checkIfValidCommit(metaClient, commitTs)) {
- currentRecord = new HoodieRecord(new
HoodieKey(currentRecord.getRecordKey(), partitionPath),
+ if (hoodieRecordIterator.hasNext() && statements.size() <
multiGetBatchSize) {
+ continue;
+ }
+
+ // get results for batch from Hbase
+ Result[] results = doGet(hTable, statements);
+ // clear statements to be GC'd
+ statements.clear();
+ for (Result result : results) {
+ // first, attempt to grab location from HBase
+ HoodieRecord currentRecord = currentBatchOfRecords.remove(0);
+
+ if (result.getRow() == null) {
+ taggedRecords.add(currentRecord);
+ continue;
+ }
+
+ String keyFromResult = Bytes.toString(result.getRow());
+ String commitTs =
Bytes.toString(result.getValue(SYSTEM_COLUMN_FAMILY, COMMIT_TS_COLUMN));
+ String fileId =
Bytes.toString(result.getValue(SYSTEM_COLUMN_FAMILY, FILE_NAME_COLUMN));
+ String partitionPath =
Bytes.toString(result.getValue(SYSTEM_COLUMN_FAMILY, PARTITION_PATH_COLUMN));
+
+ if (!checkIfValidCommit(metaClient, commitTs)) {
+ // if commit is invalid, treat this as a new taggedRecord
+ taggedRecords.add(currentRecord);
+ continue;
+ }
+
+
+ if (updatePartitionPath &&
!partitionPath.equals(currentRecord.getPartitionPath())) {
Review comment:
It seems like only the following lines have changed but the PR changes
indentation for other lines making it hard to understand the exact changes, can
you please rework this to change only what is intended ?
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]