xiarixiaoyao commented on a change in pull request #3203:
URL: https://github.com/apache/hudi/pull/3203#discussion_r718194228



##########
File path: 
hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieParquetRealtimeInputFormat.java
##########
@@ -119,6 +275,11 @@ void addProjectionToJobConf(final RealtimeSplit 
realtimeSplit, final JobConf job
     addProjectionToJobConf(realtimeSplit, jobConf);
     LOG.info("Creating record reader with readCols :" + 
jobConf.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR)
         + ", Ids :" + 
jobConf.get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR));
+
+    // for log only split, we no need parquet reader, set it to empty
+    if (realtimeSplit.getPath().toString().contains(".log.")) {
+      return new HoodieRealtimeRecordReader(realtimeSplit, jobConf, new 
HoodieEmptyRecordReader());
+    }

Review comment:
       agree

##########
File path: 
hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeCompactedRecordReader.java
##########
@@ -55,13 +55,20 @@
   private final Set<String> deltaRecordKeys;
   private int recordKeyIndex = 
HoodieInputFormatUtils.HOODIE_RECORD_KEY_COL_POS;
   private Iterator<String> deltaItr;
+  private boolean logFileOnlySplit;
+  private HoodieMergedLogReader logReader;
 
   public RealtimeCompactedRecordReader(RealtimeSplit split, JobConf job,
       RecordReader<NullWritable, ArrayWritable> realReader) throws IOException 
{
     super(split, job);
     this.parquetReader = realReader;
-    this.deltaRecordMap = getMergedLogRecordScanner().getRecords();
+    HoodieMergedLogRecordScanner hoodieMergedLogRecordScanner = 
getMergedLogRecordScanner();
+    this.deltaRecordMap = hoodieMergedLogRecordScanner.getRecords();
     this.deltaRecordKeys = new HashSet<>(this.deltaRecordMap.keySet());
+    if (split.getPath().toString().contains(".log.")) {
+      this.logFileOnlySplit = true;

Review comment:
       agree




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to