jackye1995 commented on a change in pull request #3887:
URL: https://github.com/apache/iceberg/pull/3887#discussion_r784278242
##########
File path:
aws/src/main/java/org/apache/iceberg/aws/glue/IcebergToGlueConverter.java
##########
@@ -252,59 +255,90 @@ private static String toTypeString(Type type) {
private static List<Column> toColumns(TableMetadata metadata) {
List<Column> columns = Lists.newArrayList();
- Set<NestedField> rootColumnSet = Sets.newHashSet();
+ Set<String> addedNames = Sets.newHashSet();
// Add schema-column fields
for (NestedField field : metadata.schema().columns()) {
- rootColumnSet.add(field);
- columns.add(Column.builder()
- .name(field.name())
- .type(toTypeString(field.type()))
- .comment(field.doc())
- .parameters(convertToParameters(SCHEMA_COLUMN, field))
- .build());
+ addColumnWithDedupe(columns, addedNames, field, field.name(),
SCHEMA_COLUMN);
}
// Add schema-subfield
- for (NestedField field :
TypeUtil.indexById(metadata.schema().asStruct()).values()) {
- if (!rootColumnSet.contains(field)) {
- columns.add(Column.builder()
- .name(field.name())
- .type(toTypeString(field.type()))
- .comment(field.doc())
- .parameters(convertToParameters(SCHEMA_SUBFIELD, field))
- .build());
+ for (String fieldName :
TypeUtil.indexNameById(metadata.schema().asStruct()).values()) {
+ NestedField field = metadata.schema().findField(fieldName);
+ if (field != null) {
+ addColumnWithDedupe(columns, addedNames, field, fieldName,
SCHEMA_SUBFIELD);
}
}
// Add partition-field
for (PartitionField partitionField : metadata.spec().fields()) {
+ addPartitionColumnWithDedupe(columns, addedNames, partitionField,
metadata.spec().schema());
+ }
+ return columns;
+ }
+
+ private static void addColumnWithDedupe(List<Column> columns, Set<String>
dedupe,
+ NestedField field, String fieldName,
String usage) {
+ if (!dedupe.contains(fieldName)) {
+ columns.add(Column.builder()
+ .name(fieldName)
+ .type(toTypeString(field.type()))
+ .comment(field.doc())
+ .parameters(convertToParameters(usage, field))
+ .build());
+ dedupe.add(fieldName);
+ }
+ }
+
+ private static void addPartitionColumnWithDedupe(List<Column> columns,
Set<String> dedupe,
+ PartitionField
partitionField, Schema schema) {
+ String typeId = null;
+ String typeString = null;
+ try {
Type type = partitionField.transform()
-
.getResultType(metadata.schema().findField(partitionField.sourceId()).type());
+ .getResultType(schema.findField(partitionField.sourceId()).type());
+ typeId = type.typeId().name();
+ typeString = toTypeString(type);
+ } catch (RuntimeException e) {
+ LOG.error("Failed to convert partition field to column", e);
+ }
+
+ // avoid identity partition field name collide with schema columns
+ String fieldName = partitionField.name();
+ if (partitionField.transform().isIdentity()) {
+ fieldName += "_identity";
+ }
+
+ if (!dedupe.contains(fieldName)) {
columns.add(Column.builder()
- .name(partitionField.name())
- .type(toTypeString(type))
- .parameters(convertToPartitionFieldParameters(type, partitionField))
+ .name(fieldName)
+ .type(typeString)
+ .parameters(convertToPartitionFieldParameters(typeId, typeString,
partitionField))
.build());
+ dedupe.add(fieldName);
}
- return columns;
}
private static Map<String, String> convertToParameters(String fieldUsage,
NestedField field) {
return ImmutableMap.of(ICEBERG_FIELD_USAGE, fieldUsage,
- ICEBERG_FIELD_TYPE_TYPE_ID, field.type().typeId().toString(),
- ICEBERG_FIELD_TYPE_STRING, toTypeString(field.type()),
+ ICEBERG_FIELD_TYPE_TYPE_ID,
safeString(field.type().typeId().toString()),
Review comment:
this cannot avoid a null because you are already calling toString()
##########
File path:
aws/src/main/java/org/apache/iceberg/aws/glue/IcebergToGlueConverter.java
##########
@@ -44,6 +46,7 @@
import org.slf4j.LoggerFactory;
import software.amazon.awssdk.services.glue.model.Column;
import software.amazon.awssdk.services.glue.model.DatabaseInput;
+import software.amazon.awssdk.services.glue.model.Schedule;
Review comment:
unused dependency
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]