SourabhBadhya commented on code in PR #5254:
URL: https://github.com/apache/hive/pull/5254#discussion_r1701682119
##########
iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/IcebergAcidUtil.java:
##########
@@ -252,13 +319,96 @@ public boolean hasNext() {
public T next() {
T next = currentIterator.next();
GenericRecord rec = (GenericRecord) next;
+ IcebergAcidUtil.copyFields(rec, FILE_READ_META_COLS.size(),
current.size(), current);
PositionDeleteInfo.setIntoConf(conf,
IcebergAcidUtil.parseSpecId(rec),
IcebergAcidUtil.computePartitionHash(rec),
IcebergAcidUtil.parseFilePath(rec),
- IcebergAcidUtil.parseFilePosition(rec));
- IcebergAcidUtil.copyFields(rec, FILE_READ_META_COLS.size(),
current.size(), current);
+ IcebergAcidUtil.parseFilePosition(rec),
+ StringUtils.EMPTY);
+ return (T) current;
+ }
+ }
+
+ public static class MergeTaskVirtualColumnAwareIterator<T> implements
CloseableIterator<T> {
+
+ private final CloseableIterator<T> currentIterator;
+ private GenericRecordBuilder<T> recordBuilder;
+ private final Schema expectedSchema;
+ private final Configuration conf;
+ private final PartitionSpec partitionSpec;
+ private final StructLike partition;
+
+ public MergeTaskVirtualColumnAwareIterator(CloseableIterator<T>
currentIterator,
+ Schema expectedSchema,
Configuration conf, ContentFile contentFile,
+ Table table) {
+ this.currentIterator = currentIterator;
+ this.expectedSchema = expectedSchema;
+ this.conf = conf;
+ this.partition = contentFile.partition();
+ this.recordBuilder = new GenericRecordBuilder<>(
+ new Schema(expectedSchema.columns().subList(0,
expectedSchema.columns().size())));
+ this.partitionSpec = table.specs().get(contentFile.specId());
+ }
+
+ @Override
+ public void close() throws IOException {
+ currentIterator.close();
+ }
+
+ @Override
+ public boolean hasNext() {
+ return currentIterator.hasNext();
+ }
+
+ @Override
+ public T next() {
+ T next = currentIterator.next();
+ GenericRecord rec = (GenericRecord) next;
+ return recordBuilder.withSpecId(partitionSpec.specId())
+ .withPartitionHash(computeHash(partition))
+ .withFilePath(IcebergAcidUtil.getFilePath(rec))
+ .withFilePosition(IcebergAcidUtil.getDeleteFilePosition(rec))
+ .withPartitionKey(getSerializedPartitionKey(partition,
partitionSpec)).build();
+ }
+ }
+
+ public static class GenericRecordBuilder<T> {
Review Comment:
Done.
##########
iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/IcebergAcidUtil.java:
##########
@@ -252,13 +319,96 @@ public boolean hasNext() {
public T next() {
T next = currentIterator.next();
GenericRecord rec = (GenericRecord) next;
+ IcebergAcidUtil.copyFields(rec, FILE_READ_META_COLS.size(),
current.size(), current);
PositionDeleteInfo.setIntoConf(conf,
IcebergAcidUtil.parseSpecId(rec),
IcebergAcidUtil.computePartitionHash(rec),
IcebergAcidUtil.parseFilePath(rec),
- IcebergAcidUtil.parseFilePosition(rec));
- IcebergAcidUtil.copyFields(rec, FILE_READ_META_COLS.size(),
current.size(), current);
+ IcebergAcidUtil.parseFilePosition(rec),
+ StringUtils.EMPTY);
+ return (T) current;
+ }
+ }
+
+ public static class MergeTaskVirtualColumnAwareIterator<T> implements
CloseableIterator<T> {
+
+ private final CloseableIterator<T> currentIterator;
+ private GenericRecordBuilder<T> recordBuilder;
+ private final Schema expectedSchema;
+ private final Configuration conf;
+ private final PartitionSpec partitionSpec;
+ private final StructLike partition;
+
+ public MergeTaskVirtualColumnAwareIterator(CloseableIterator<T>
currentIterator,
+ Schema expectedSchema,
Configuration conf, ContentFile contentFile,
+ Table table) {
+ this.currentIterator = currentIterator;
+ this.expectedSchema = expectedSchema;
+ this.conf = conf;
+ this.partition = contentFile.partition();
+ this.recordBuilder = new GenericRecordBuilder<>(
+ new Schema(expectedSchema.columns().subList(0,
expectedSchema.columns().size())));
+ this.partitionSpec = table.specs().get(contentFile.specId());
+ }
+
+ @Override
+ public void close() throws IOException {
+ currentIterator.close();
+ }
+
+ @Override
+ public boolean hasNext() {
+ return currentIterator.hasNext();
+ }
+
+ @Override
+ public T next() {
+ T next = currentIterator.next();
+ GenericRecord rec = (GenericRecord) next;
+ return recordBuilder.withSpecId(partitionSpec.specId())
+ .withPartitionHash(computeHash(partition))
+ .withFilePath(IcebergAcidUtil.getFilePath(rec))
+ .withFilePosition(IcebergAcidUtil.getDeleteFilePosition(rec))
+ .withPartitionKey(getSerializedPartitionKey(partition,
partitionSpec)).build();
+ }
+ }
+
+ public static class GenericRecordBuilder<T> {
+
+ private final GenericRecord current;
+
+ public GenericRecordBuilder(Schema schema) {
+ current = GenericRecord.create(schema);
+ }
+
+ public GenericRecordBuilder<T> withSpecId(int specId) {
+ IcebergAcidUtil.setSpecId(current, specId);
Review Comment:
Done.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]