[
https://issues.apache.org/jira/browse/GOBBLIN-1250?focusedWorklogId=480487&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-480487
]
ASF GitHub Bot logged work on GOBBLIN-1250:
-------------------------------------------
Author: ASF GitHub Bot
Created on: 08/Sep/20 23:41
Start Date: 08/Sep/20 23:41
Worklog Time Spent: 10m
Work Description: autumnust commented on a change in pull request #3090:
URL: https://github.com/apache/incubator-gobblin/pull/3090#discussion_r485253677
##########
File path:
gobblin-modules/gobblin-orc/src/test/java/org/apache/gobblin/writer/AvroOrcSchemaConverterTest.java
##########
@@ -0,0 +1,203 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.gobblin.writer;
+
+import java.util.List;
+
+import org.apache.avro.Schema;
+import org.apache.avro.SchemaBuilder;
+import org.apache.orc.TypeDescription;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+import com.google.common.base.Preconditions;
+
+import static
org.apache.gobblin.writer.AvroOrcSchemaConverter.sanitizeNullableSchema;
+
+
+public class AvroOrcSchemaConverterTest {
+ @Test
+ public void testUnionORCSchemaTranslation() throws Exception {
+ Schema avroUnion = SchemaBuilder.record("test")
+ .fields()
+ .name("test_union")
+
.type(SchemaBuilder.builder().unionOf().stringType().and().intType().and().nullType().endUnion())
+ .noDefault()
+ .endRecord();
+
+ TypeDescription unionSchema = TypeDescription.createUnion()
+ .addUnionChild(TypeDescription.createString())
+ .addUnionChild(TypeDescription.createInt());
+ TypeDescription recordSchemaWithUnion =
TypeDescription.createStruct().addField("test_union", unionSchema);
+
+ // Verify the schema conversion for Union works
+ Assert.assertEquals(AvroOrcSchemaConverter.getOrcSchema(avroUnion),
recordSchemaWithUnion);
+
+ //Create a nullable union field
+ Schema nullableAvroUnion = SchemaBuilder.record("test")
+ .fields()
+ .name("test_union")
+
.type(SchemaBuilder.builder().unionOf().stringType().and().nullType().endUnion())
+ .noDefault()
+ .endRecord();
+ //Assert that Orc schema has flattened the nullable union to the member's
type
+ Assert.assertEquals(AvroOrcSchemaConverter.getOrcSchema(nullableAvroUnion),
+ TypeDescription.createStruct().addField("test_union",
TypeDescription.createString()));
+
+ //Create a non nullable union type
+ Schema nonNullableAvroUnion = SchemaBuilder.record("test")
+ .fields()
+ .name("test_union")
+ .type(SchemaBuilder.builder().unionOf().stringType().endUnion())
+ .noDefault()
+ .endRecord();
+ //Ensure that the union type is preserved
+
Assert.assertEquals(AvroOrcSchemaConverter.getOrcSchema(nonNullableAvroUnion),
TypeDescription.createStruct()
+ .addField("test_union",
TypeDescription.createUnion().addUnionChild(TypeDescription.createString())));
+ }
+
+ @Test
+ public void testTrivialAvroSchemaTranslation() throws Exception {
+
+ // Trivial cases
+ Schema avroSchema = SchemaBuilder.record("test")
+ .fields()
+ .name("string_type")
+ .type(SchemaBuilder.builder().stringType())
+ .noDefault()
+ .name("int_type")
+ .type(SchemaBuilder.builder().intType())
+ .noDefault()
+ .endRecord();
+
+ TypeDescription orcSchema = TypeDescription.createStruct()
+ .addField("string_type", TypeDescription.createString())
+ .addField("int_type", TypeDescription.createInt());
+
+ // Top-level record name will not be replicated in conversion result.
+ Assert.assertEquals(avroSchema.getFields(),
getAvroSchema(orcSchema).getFields());
+ }
+
+ @Test
+ public void testUnionAvroSchemaTranslation() throws Exception {
+ Schema avroSchema = SchemaBuilder.record("test")
+ .fields()
+ .name("union_nested")
+
.type(SchemaBuilder.builder().unionOf().stringType().and().intType().endUnion())
+ .noDefault()
+ .endRecord();
+ TypeDescription orcSchema = TypeDescription.createStruct()
+ .addField("union_nested", TypeDescription.createUnion()
+ .addUnionChild(TypeDescription.createString())
+ .addUnionChild(TypeDescription.createInt()));
+
+ Assert.assertEquals(avroSchema.getFields(),
getAvroSchema(orcSchema).getFields());
+ }
+
+ @Test
+ public void testSchemaSanitization() throws Exception {
+
+ // Two field along with null
+ Schema avroSchema =
SchemaBuilder.builder().unionOf().nullType().and().stringType().and().intType().endUnion();
+ Schema expectedSchema =
SchemaBuilder.builder().unionOf().stringType().and().intType().endUnion();
+ Assert.assertEquals(sanitizeNullableSchema(avroSchema), expectedSchema);
+
+ // Only one field except null
+ Schema avroSchema_1 = SchemaBuilder.builder()
+ .unionOf()
+ .nullType()
+ .and()
+ .record("test")
+ .fields()
+ .name("aaa")
+ .type(SchemaBuilder.builder().intType())
+ .noDefault()
+ .endRecord()
+ .endUnion();
+ expectedSchema = SchemaBuilder.builder()
+ .record("test")
+ .fields()
+ .name("aaa")
+ .type(SchemaBuilder.builder().intType())
+ .noDefault()
+ .endRecord();
+ Assert.assertEquals(sanitizeNullableSchema(avroSchema_1), expectedSchema);
+ }
+
+ public static Schema getAvroSchema(TypeDescription schema) {
Review comment:
The same argument as the first one ...
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
Issue Time Tracking
-------------------
Worklog Id: (was: 480487)
Time Spent: 1h 10m (was: 1h)
> ORC Writer
> -----------
>
> Key: GOBBLIN-1250
> URL: https://issues.apache.org/jira/browse/GOBBLIN-1250
> Project: Apache Gobblin
> Issue Type: New Feature
> Reporter: Lei Sun
> Priority: Major
> Time Spent: 1h 10m
> Remaining Estimate: 0h
>
--
This message was sent by Atlassian Jira
(v8.3.4#803005)