[ 
https://issues.apache.org/jira/browse/GOBBLIN-1114?focusedWorklogId=420863&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-420863
 ]

ASF GitHub Bot logged work on GOBBLIN-1114:
-------------------------------------------

                Author: ASF GitHub Bot
            Created on: 12/Apr/20 02:00
            Start Date: 12/Apr/20 02:00
    Worklog Time Spent: 10m 
      Work Description: sv2000 commented on pull request #2954: [GOBBLIN-1114] 
OrcValueMapper schema evolution up-conversion recursive
URL: https://github.com/apache/incubator-gobblin/pull/2954#discussion_r407132289
 
 

 ##########
 File path: 
gobblin-compaction/src/main/java/org/apache/gobblin/compaction/mapreduce/orc/OrcValueMapper.java
 ##########
 @@ -24,58 +24,80 @@
 
 import org.apache.gobblin.compaction.mapreduce.RecordKeyMapperBase;
 import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.mapreduce.RecordReader;
 import org.apache.orc.OrcConf;
 import org.apache.orc.TypeDescription;
 import org.apache.orc.impl.ConvertTreeReaderFactory;
 import org.apache.orc.impl.SchemaEvolution;
 import org.apache.orc.mapred.OrcKey;
+import org.apache.orc.mapred.OrcList;
+import org.apache.orc.mapred.OrcMap;
 import org.apache.orc.mapred.OrcStruct;
+import org.apache.orc.mapred.OrcUnion;
 import org.apache.orc.mapred.OrcValue;
 import org.apache.orc.mapreduce.OrcMapreduceRecordReader;
 
+import com.google.common.annotations.VisibleForTesting;
+
+import lombok.extern.slf4j.Slf4j;
+
 
 /**
  * To keep consistent with {@link OrcMapreduceRecordReader}'s decision on 
implementing
  * {@link RecordReader} with {@link NullWritable} as the key and generic type 
of value, the ORC Mapper will
  * read in the record as the input value.
  */
+@Slf4j
 public class OrcValueMapper extends RecordKeyMapperBase<NullWritable, 
OrcStruct, Object, OrcValue> {
 
   private OrcValue outValue;
   private TypeDescription mapperSchema;
 
+  // This is added mostly for debuggability.
+  private static int writeCount = 0;
+
   @Override
   protected void setup(Context context)
       throws IOException, InterruptedException {
     super.setup(context);
     this.outValue = new OrcValue();
-    this.mapperSchema = 
TypeDescription.fromString(context.getConfiguration().get(OrcConf.MAPRED_INPUT_SCHEMA.getAttribute()));
+    this.mapperSchema =
+        
TypeDescription.fromString(context.getConfiguration().get(OrcConf.MAPRED_INPUT_SCHEMA.getAttribute()));
   }
 
   @Override
   protected void map(NullWritable key, OrcStruct orcStruct, Context context)
       throws IOException, InterruptedException {
-    OrcStruct upConvertedStruct = upConvertOrcStruct(orcStruct, context);
-    if (context.getNumReduceTasks() == 0) {
-      this.outValue.value = upConvertedStruct;
-      context.write(NullWritable.get(), this.outValue);
-    } else {
-      this.outValue.value = upConvertedStruct;
-      context.write(getDedupKey(upConvertedStruct), this.outValue);
+    OrcStruct upConvertedStruct = upConvertOrcStruct(orcStruct, mapperSchema);
+    try {
+      if (context.getNumReduceTasks() == 0) {
+        this.outValue.value = upConvertedStruct;
 
 Review comment:
   move this line outside the if..else block, since it is both in the if as 
well as else conditions.
 
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Issue Time Tracking
-------------------

    Worklog Id:     (was: 420863)
    Time Spent: 1h 10m  (was: 1h)

> ORC-mapred schema evolution enhancement
> ---------------------------------------
>
>                 Key: GOBBLIN-1114
>                 URL: https://issues.apache.org/jira/browse/GOBBLIN-1114
>             Project: Apache Gobblin
>          Issue Type: Improvement
>            Reporter: Lei Sun
>            Priority: Major
>          Time Spent: 1h 10m
>  Remaining Estimate: 0h
>




--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to