umehrot2 commented on a change in pull request #2160:
URL: https://github.com/apache/hudi/pull/2160#discussion_r512379798
##########
File path:
hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/util/HiveSchemaUtil.java
##########
@@ -155,113 +150,75 @@ private static boolean isFieldExistsInSchema(Map<String,
String> newTableSchema,
* @param parquetType : Single paruet field
* @return : Equivalent sHive schema
*/
- private static String convertField(final Type parquetType) {
+ private static String convertFieldFromAvro(final Schema schema) {
StringBuilder field = new StringBuilder();
- if (parquetType.isPrimitive()) {
- final PrimitiveType.PrimitiveTypeName parquetPrimitiveTypeName =
- parquetType.asPrimitiveType().getPrimitiveTypeName();
- final OriginalType originalType = parquetType.getOriginalType();
- if (originalType == OriginalType.DECIMAL) {
- final DecimalMetadata decimalMetadata =
parquetType.asPrimitiveType().getDecimalMetadata();
- return
field.append("DECIMAL(").append(decimalMetadata.getPrecision()).append(" , ")
- .append(decimalMetadata.getScale()).append(")").toString();
- } else if (originalType == OriginalType.DATE) {
+ Schema.Type type = schema.getType();
+ LogicalType logicalType = schema.getLogicalType();
+ if (logicalType != null) {
+ if (logicalType instanceof LogicalTypes.Decimal) {
+ return field.append("DECIMAL(").append(((LogicalTypes.Decimal)
logicalType).getPrecision()).append(" , ")
+ .append(((LogicalTypes.Decimal)
logicalType).getScale()).append(")").toString();
+ } else if (logicalType instanceof LogicalTypes.Date) {
return field.append("DATE").toString();
+ } else {
+ Log.info("not handle the type transform");
}
- // TODO - fix the method naming here
- return parquetPrimitiveTypeName.convert(new
PrimitiveType.PrimitiveTypeNameConverter<String, RuntimeException>() {
- @Override
- public String convertBOOLEAN(PrimitiveType.PrimitiveTypeName
primitiveTypeName) {
- return "boolean";
- }
-
- @Override
- public String convertINT32(PrimitiveType.PrimitiveTypeName
primitiveTypeName) {
- return "int";
- }
-
- @Override
- public String convertINT64(PrimitiveType.PrimitiveTypeName
primitiveTypeName) {
- return "bigint";
- }
-
- @Override
- public String convertINT96(PrimitiveType.PrimitiveTypeName
primitiveTypeName) {
- return "timestamp-millis";
- }
-
- @Override
- public String convertFLOAT(PrimitiveType.PrimitiveTypeName
primitiveTypeName) {
- return "float";
- }
-
- @Override
- public String convertDOUBLE(PrimitiveType.PrimitiveTypeName
primitiveTypeName) {
- return "double";
- }
-
- @Override
- public String
convertFIXED_LEN_BYTE_ARRAY(PrimitiveType.PrimitiveTypeName primitiveTypeName) {
- return "binary";
- }
-
- @Override
- public String convertBINARY(PrimitiveType.PrimitiveTypeName
primitiveTypeName) {
- if (originalType == OriginalType.UTF8 || originalType ==
OriginalType.ENUM) {
- return "string";
- } else {
- return "binary";
- }
+ }
+ if (type.equals(Schema.Type.BOOLEAN)) {
+ return field.append("boolean").toString();
+ } else if (type.equals(Schema.Type.INT)) {
+ return field.append("int").toString();
+ } else if (type.equals(Schema.Type.LONG)) {
+ return field.append("bigint").toString();
+ } else if (type.equals(Schema.Type.FLOAT)) {
+ return field.append("float").toString();
+ } else if (type.equals(Schema.Type.DOUBLE)) {
+ return field.append("double").toString();
+ } else if (type.equals(Schema.Type.BYTES)) {
+ return field.append("binary").toString();
+ } else if (type.equals(Schema.Type.STRING)) {
+ return field.append("string").toString();
+ } else if (type.equals(Schema.Type.RECORD)) {
+ List<Pair<String, Schema>> noNullSchemaFields = new ArrayList<>();
+ for (Schema.Field fieldItem : schema.getFields()) {
+ if (fieldItem.schema().getType().equals(Schema.Type.NULL)) {
Review comment:
I would suggest moving this logic inside `createHiveStructFromAvro` when
you have a dedicated function just for that. This is how the code was organized
earlier. So you can just pass `List<Schema.Field>` from here to
`createHiveStructFromAvro` and let it do the handling.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]