deniskuzZ commented on code in PR #6074: URL: https://github.com/apache/hive/pull/6074#discussion_r2358327542
########## iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveSchemaUtil.java: ########## @@ -329,4 +336,85 @@ public static String convertToTypeString(Type type) { throw new UnsupportedOperationException(type + " is not supported"); } } + + public static void setDefault(List<Types.NestedField> fields, Record record, Set<String> missingColumns) { + for (Types.NestedField field : fields) { + Object fieldValue = record.getField(field.name()); + + if (fieldValue == null) { + boolean isMissing = missingColumns.contains(field.name()); + + if (isMissing) { + if (field.type().isStructType()) { + // Create struct and apply defaults to all nested fields + Record nestedRecord = GenericRecord.create(field.type().asStructType()); + record.setField(field.name(), nestedRecord); + // For nested fields, we consider ALL fields as "missing" to apply defaults + setDefaultForNestedStruct(field.type().asStructType().fields(), nestedRecord); + } else if (field.writeDefault() != null) { + Object defaultValue = convertToWriteType(field.writeDefault(), field.type()); + record.setField(field.name(), defaultValue); + } + } + // Explicit NULLs remain NULL + } else if (field.type().isStructType() && fieldValue instanceof Record) { + // For existing structs, apply defaults to any null nested fields + setDefaultForNestedStruct(field.type().asStructType().fields(), (Record) fieldValue); + } + } + } + + // Special method for nested structs that always applies defaults to null fields + private static void setDefaultForNestedStruct(List<Types.NestedField> fields, Record record) { + for (Types.NestedField field : fields) { + Object fieldValue = record.getField(field.name()); + + if (fieldValue == null && field.writeDefault() != null) { + // Always apply default to null fields in nested structs + Object defaultValue = convertToWriteType(field.writeDefault(), field.type()); + record.setField(field.name(), defaultValue); + } else if (field.type().isStructType() && fieldValue instanceof Record) { + // Recursively process nested structs + setDefaultForNestedStruct(field.type().asStructType().fields(), (Record) fieldValue); + } + } + } + + private static Object convertToWriteType(Object value, Type type) { + if (value == null) { + return null; + } + + switch (type.typeId()) { + case DATE: + // Convert days since epoch (Integer) to LocalDate + if (value instanceof Integer) { + return DateTimeUtil.dateFromDays((Integer) value); + } + break; + case TIMESTAMP: + // Convert microseconds since epoch (Long) to LocalDateTime + if (value instanceof Long) { + return DateTimeUtil.timestampFromMicros((Long) value); + } + break; + case TIMESTAMP_NANO: Review Comment: looks like we support more types here than in `IcebergInternalRecordWrapper.converter`. could we extend it and reuse? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For additional commands, e-mail: gitbox-h...@hive.apache.org