alexeykudinkin commented on code in PR #6782:
URL: https://github.com/apache/hudi/pull/6782#discussion_r1054824916
##########
hudi-common/src/main/java/org/apache/hudi/common/table/log/AbstractHoodieLogRecordReader.java:
##########
@@ -188,40 +179,41 @@ protected AbstractHoodieLogRecordReader(FileSystem fs,
String basePath, List<Str
this.path = basePath;
this.useScanV2 = useScanV2;
- // Key fields when populate meta fields is disabled (that is, virtual keys
enabled)
- if (!tableConfig.populateMetaFields()) {
- this.populateMetaFields = false;
- this.simpleKeyGenFields = Option.of(
- Pair.of(tableConfig.getRecordKeyFieldProp(),
tableConfig.getPartitionFieldProp()));
- }
- this.partitionName = partitionName;
- }
+ if (keyFieldOverride.isPresent()) {
+ // TODO elaborate
+ checkState(partitionNameOverride.isPresent());
- protected String getKeyField() {
- if (this.populateMetaFields) {
- return HoodieRecord.RECORD_KEY_METADATA_FIELD;
+ this.populateMetaFields = false;
+ this.recordKeyField = keyFieldOverride.get();
+ this.partitionPathFieldOpt = Option.empty();
+ } else if (tableConfig.populateMetaFields()) {
+ this.populateMetaFields = true;
+ this.recordKeyField = HoodieRecord.RECORD_KEY_METADATA_FIELD;
+ this.partitionPathFieldOpt =
Option.of(HoodieRecord.PARTITION_PATH_METADATA_FIELD);
Review Comment:
SG
##########
hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieMergedLogRecordScanner.java:
##########
@@ -106,30 +109,85 @@ protected HoodieMergedLogRecordScanner(FileSystem fs,
String basePath, List<Stri
}
}
- protected void performScan() {
+ /**
+ * Scans delta-log files processing blocks
+ */
+ public final void scan() {
+ scan(false);
+ }
+
+ public final void scan(boolean skipProcessingBlocks) {
+ scanInternal(Option.empty(), skipProcessingBlocks);
+ }
+
+ /**
+ * Provides incremental scanning capability where only provided keys will be
looked
+ * up in the delta-log files, scanned and subsequently materialized into the
internal
+ * cache
+ *
+ * @param keys to be looked up
+ */
+ public void scanByFullKeys(List<String> keys) {
+ if (forceFullScan) {
+ return; // no-op
+ }
+
+ List<String> missingKeys = keys.stream()
+ .filter(key -> !records.containsKey(key))
+ .collect(Collectors.toList());
+
+ if (missingKeys.isEmpty()) {
+ // All the required records are already fetched, no-op
+ return;
+ }
+
+ scanInternal(Option.of(KeySpec.fullKeySpec(missingKeys)), false);
+ }
+
+ /**
+ * Provides incremental scanning capability where only keys matching
provided key-prefixes
+ * will be looked up in the delta-log files, scanned and subsequently
materialized into
+ * the internal cache
+ *
+ * @param keyPrefixes to be looked up
+ */
+ public void scanByKeyPrefixes(List<String> keyPrefixes) {
+ // TODO add caching for queried prefixes
Review Comment:
It's actually a fairly simple one (we just need to know whether particular
prefix has already been processed, in which case we can respond directly)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]