This is an automated email from the ASF dual-hosted git repository. sivabalan pushed a commit to branch release-0.5.3 in repository https://gitbox.apache.org/repos/asf/hudi.git
commit 0050e00afdc728793f9928cbb594e657c7219c41 Author: Udit Mehrotra <[email protected]> AuthorDate: Sun Mar 1 10:42:58 2020 -0800 [HUDI-607] Fix to allow creation/syncing of Hive tables partitioned by Date type columns (#1330) --- .../main/java/org/apache/hudi/DataSourceUtils.java | 40 +++++++++++++- hudi-spark/src/test/java/DataSourceUtilsTest.java | 61 ++++++++++++++++++++++ 2 files changed, 100 insertions(+), 1 deletion(-) diff --git a/hudi-spark/src/main/java/org/apache/hudi/DataSourceUtils.java b/hudi-spark/src/main/java/org/apache/hudi/DataSourceUtils.java index 475a925..99a795d 100644 --- a/hudi-spark/src/main/java/org/apache/hudi/DataSourceUtils.java +++ b/hudi-spark/src/main/java/org/apache/hudi/DataSourceUtils.java @@ -18,6 +18,8 @@ package org.apache.hudi; +import org.apache.avro.LogicalTypes; +import org.apache.avro.Schema; import org.apache.hudi.client.HoodieReadClient; import org.apache.hudi.client.HoodieWriteClient; import org.apache.hudi.client.WriteStatus; @@ -43,6 +45,7 @@ import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import java.io.IOException; +import java.time.LocalDate; import java.util.ArrayList; import java.util.Collections; import java.util.List; @@ -78,7 +81,8 @@ public class DataSourceUtils { // return, if last part of name if (i == parts.length - 1) { - return val; + Schema fieldSchema = valueNode.getSchema().getField(part).schema(); + return convertValueForSpecificDataTypes(fieldSchema, val); } else { // VC: Need a test here if (!(val instanceof GenericRecord)) { @@ -98,6 +102,40 @@ public class DataSourceUtils { } /** + * This method converts values for fields with certain Avro/Parquet data types that require special handling. + * + * Logical Date Type is converted to actual Date value instead of Epoch Integer which is how it is + * represented/stored in parquet. + * + * @param fieldSchema avro field schema + * @param fieldValue avro field value + * @return field value either converted (for certain data types) or as it is. + */ + private static Object convertValueForSpecificDataTypes(Schema fieldSchema, Object fieldValue) { + if (fieldSchema == null) { + return fieldValue; + } + + if (isLogicalTypeDate(fieldSchema)) { + return LocalDate.ofEpochDay(Long.parseLong(fieldValue.toString())); + } + return fieldValue; + } + + /** + * Given an Avro field schema checks whether the field is of Logical Date Type or not. + * + * @param fieldSchema avro field schema + * @return boolean indicating whether fieldSchema is of Avro's Date Logical Type + */ + private static boolean isLogicalTypeDate(Schema fieldSchema) { + if (fieldSchema.getType() == Schema.Type.UNION) { + return fieldSchema.getTypes().stream().anyMatch(schema -> schema.getLogicalType() == LogicalTypes.date()); + } + return fieldSchema.getLogicalType() == LogicalTypes.date(); + } + + /** * Create a key generator class via reflection, passing in any configs needed. * <p> * If the class name of key generator is configured through the properties file, i.e., {@code props}, use the diff --git a/hudi-spark/src/test/java/DataSourceUtilsTest.java b/hudi-spark/src/test/java/DataSourceUtilsTest.java new file mode 100644 index 0000000..4fe7547 --- /dev/null +++ b/hudi-spark/src/test/java/DataSourceUtilsTest.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericData; +import org.apache.avro.generic.GenericRecord; +import org.apache.hudi.DataSourceUtils; +import org.junit.Test; + +import java.time.LocalDate; + +import static org.junit.Assert.assertEquals; + +public class DataSourceUtilsTest { + + @Test + public void testAvroRecordsFieldConversion() { + // There are fields event_date1, event_date2, event_date3 with logical type as Date. event_date1 & event_date3 are + // of UNION schema type, which is a union of null and date type in different orders. event_date2 is non-union + // date type + String avroSchemaString = "{\"type\": \"record\"," + "\"name\": \"events\"," + "\"fields\": [ " + + "{\"name\": \"event_date1\", \"type\" : [{\"type\" : \"int\", \"logicalType\" : \"date\"}, \"null\"]}," + + "{\"name\": \"event_date2\", \"type\" : {\"type\": \"int\", \"logicalType\" : \"date\"}}," + + "{\"name\": \"event_date3\", \"type\" : [\"null\", {\"type\" : \"int\", \"logicalType\" : \"date\"}]}," + + "{\"name\": \"event_name\", \"type\": \"string\"}," + + "{\"name\": \"event_organizer\", \"type\": \"string\"}" + + "]}"; + + Schema avroSchema = new Schema.Parser().parse(avroSchemaString); + GenericRecord record = new GenericData.Record(avroSchema); + record.put("event_date1", 18000); + record.put("event_date2", 18001); + record.put("event_date3", 18002); + record.put("event_name", "Hudi Meetup"); + record.put("event_organizer", "Hudi PMC"); + + assertEquals(LocalDate.ofEpochDay(18000).toString(), DataSourceUtils.getNestedFieldValAsString(record, "event_date1", + true)); + assertEquals(LocalDate.ofEpochDay(18001).toString(), DataSourceUtils.getNestedFieldValAsString(record, "event_date2", + true)); + assertEquals(LocalDate.ofEpochDay(18002).toString(), DataSourceUtils.getNestedFieldValAsString(record, "event_date3", + true)); + assertEquals("Hudi Meetup", DataSourceUtils.getNestedFieldValAsString(record, "event_name", true)); + assertEquals("Hudi PMC", DataSourceUtils.getNestedFieldValAsString(record, "event_organizer", true)); + } +}
