deniskuzZ commented on code in PR #6074: URL: https://github.com/apache/hive/pull/6074#discussion_r2355774779
########## iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/writer/HiveIcebergRecordWriter.java: ########## @@ -20,34 +20,131 @@ package org.apache.iceberg.mr.hive.writer; import java.io.IOException; +import java.util.Arrays; +import java.util.HashSet; import java.util.List; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; import org.apache.hadoop.io.Writable; import org.apache.iceberg.DataFile; import org.apache.iceberg.Table; +import org.apache.iceberg.data.GenericRecord; import org.apache.iceberg.data.Record; import org.apache.iceberg.io.DataWriteResult; import org.apache.iceberg.io.OutputFileFactory; import org.apache.iceberg.mr.hive.FilesForCommit; import org.apache.iceberg.mr.hive.writer.WriterBuilder.Context; import org.apache.iceberg.mr.mapred.Container; +import org.apache.iceberg.relocated.com.google.common.collect.Sets; +import org.apache.iceberg.types.Type; +import org.apache.iceberg.types.Types; +import org.apache.iceberg.util.DateTimeUtil; class HiveIcebergRecordWriter extends HiveIcebergWriterBase { private final int currentSpecId; + private final Set<String> missingColumns; HiveIcebergRecordWriter(Table table, HiveFileWriterFactory fileWriterFactory, - OutputFileFactory dataFileFactory, Context context) { + OutputFileFactory dataFileFactory, Context context, String missingColumns) { super(table, newDataWriter(table, fileWriterFactory, dataFileFactory, context)); this.currentSpecId = table.spec().specId(); + this.missingColumns = Optional.ofNullable(missingColumns) + .map(columns -> Arrays.stream(columns.split(",")).collect(Collectors.toCollection(HashSet::new))) + .orElse(Sets.newHashSet()); } @Override public void write(Writable row) throws IOException { Record record = ((Container<Record>) row).get(); + setDefault(specs.get(currentSpecId).schema().asStruct().fields(), record, missingColumns); + writer.write(record, specs.get(currentSpecId), partition(record, currentSpecId)); } + private static void setDefault(List<Types.NestedField> fields, Record record, Set<String> missingColumns) { Review Comment: i think this helper method should be extracted from the writer ########## iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/writer/HiveIcebergRecordWriter.java: ########## @@ -20,34 +20,131 @@ package org.apache.iceberg.mr.hive.writer; import java.io.IOException; +import java.util.Arrays; +import java.util.HashSet; import java.util.List; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; import org.apache.hadoop.io.Writable; import org.apache.iceberg.DataFile; import org.apache.iceberg.Table; +import org.apache.iceberg.data.GenericRecord; import org.apache.iceberg.data.Record; import org.apache.iceberg.io.DataWriteResult; import org.apache.iceberg.io.OutputFileFactory; import org.apache.iceberg.mr.hive.FilesForCommit; import org.apache.iceberg.mr.hive.writer.WriterBuilder.Context; import org.apache.iceberg.mr.mapred.Container; +import org.apache.iceberg.relocated.com.google.common.collect.Sets; +import org.apache.iceberg.types.Type; +import org.apache.iceberg.types.Types; +import org.apache.iceberg.util.DateTimeUtil; class HiveIcebergRecordWriter extends HiveIcebergWriterBase { private final int currentSpecId; + private final Set<String> missingColumns; HiveIcebergRecordWriter(Table table, HiveFileWriterFactory fileWriterFactory, - OutputFileFactory dataFileFactory, Context context) { + OutputFileFactory dataFileFactory, Context context, String missingColumns) { super(table, newDataWriter(table, fileWriterFactory, dataFileFactory, context)); this.currentSpecId = table.spec().specId(); + this.missingColumns = Optional.ofNullable(missingColumns) + .map(columns -> Arrays.stream(columns.split(",")).collect(Collectors.toCollection(HashSet::new))) + .orElse(Sets.newHashSet()); } @Override public void write(Writable row) throws IOException { Record record = ((Container<Record>) row).get(); + setDefault(specs.get(currentSpecId).schema().asStruct().fields(), record, missingColumns); + writer.write(record, specs.get(currentSpecId), partition(record, currentSpecId)); } + private static void setDefault(List<Types.NestedField> fields, Record record, Set<String> missingColumns) { Review Comment: i think this helper methods should be extracted from the writer -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For additional commands, e-mail: gitbox-h...@hive.apache.org