sv2000 commented on a change in pull request #3060: URL: https://github.com/apache/incubator-gobblin/pull/3060#discussion_r455413115
########## File path: gobblin-compaction/src/main/java/org/apache/gobblin/compaction/mapreduce/orc/OrcUtils.java ########## @@ -491,18 +491,34 @@ public static WritableComparable createValueRecursively(TypeDescription schema) */ public static boolean eligibleForUpConvertHelper(TypeDescription originalSchema, TypeDescription targetSchema) { if (!targetSchema.getCategory().isPrimitive()) { - if (!originalSchema.getFieldNames().containsAll(targetSchema.getFieldNames())) { - return false; - } - boolean result = true; + if (targetSchema.getCategory().equals(TypeDescription.Category.LIST)) { + return eligibleForUpConvertHelper(originalSchema.getChildren().get(0), targetSchema.getChildren().get(0)); Review comment: Should we do a null check here? What happens if originalSchema.getChildren() returns null? ########## File path: gobblin-compaction/src/test/java/org/apache/gobblin/compaction/mapreduce/orc/OrcUtilsTest.java ########## @@ -294,6 +294,26 @@ public void testOrcStructProjection() throws Exception { Assert.assertEquals(projectColumnStruct, projectedStructExpectedValue); } + @Test + public void complextTypeEligibilityCheck() throws Exception { + TypeDescription struct_array_0 = TypeDescription.fromString("struct<first:array<int>,second:int>"); + TypeDescription struct_array_1 = TypeDescription.fromString("struct<first:array<int>,second:int>"); + Assert.assertTrue(OrcUtils.eligibleForUpConvert(struct_array_0, struct_array_1)); + TypeDescription struct_array_2 = TypeDescription.fromString("struct<first:array<string>,second:int>"); + Assert.assertFalse(OrcUtils.eligibleForUpConvert(struct_array_0, struct_array_2)); + + TypeDescription struct_map_0 = TypeDescription.fromString("struct<first:map<string,string>,second:int>"); + TypeDescription struct_map_1 = TypeDescription.fromString("struct<first:map<string,string>,second:int>"); + TypeDescription struct_map_2 = TypeDescription.fromString("struct<first:map<string,int>,second:int>"); + Assert.assertTrue(OrcUtils.eligibleForUpConvert(struct_map_0, struct_map_1)); + Assert.assertFalse(OrcUtils.eligibleForUpConvert(struct_map_0, struct_map_2)); + + // A complicated schema + TypeDescription struct_a = TypeDescription.fromString("struct<stone:struct<memberId:int,viewerUrn:string,aUrn:string,csUserUrn:string,time:bigint,server:string,service:string,environment:string,guid:binary,treeId:binary,requestId:int,impersonatorId:string,version:string,instance:string,appName:string,testId:string,testSegmentId:string,auditstone:struct<time:bigint,server:string,instance:string,appName:string,messageId:binary,auditVersion:int,fabricUrn:string,clusterConnectionString:string>,pageInstance:struct<pageUrn:string,trackingId:binary>,clientApplicationInstance:struct<applicationUrn:string,version:string,trackingId:binary>,originSource:string,sessionUrn:string,traceData:struct<treeId:binary,requestId:int,taskId:int,rpcTrace:string,forceTraceEnabled:boolean,context:map<string,string>,scaleFactor:double>>,requeststone:struct<browserId:string,sessionId:string,ip:string,pageKey:string,path:string,locale:string,interfaceLocale:string,trackingCode:string,referer:string,userAgent:string,ipAsBytes:binary,requestProtocol:string,requestDomain:string>,statusCode:int,bodyDataAnnotationGroups:array<struct<annotations:array<struct<domainType:struct<entityId:int,attributeId:int>,sourceUrn:string>>,groupId:int,childrenGroupIds:array<int>,hashedSchemaName:int>>,resourceName:string,info:struct<rm:string,actionOrFinderName:string,schemaName:string>,xId:struct<tenantUrn:string,eUrn:string,memberUrn:string,cloudUrn:string,clientId:string,userMetadata:struct<emad:string,accountAgeInSeconds:bigint>>>"); Review comment: Two points: 1. Maybe avoid exposing LinkedIn internal schemas outside, and 2. From the schema definitions, it is hard to see what exactly is being tested. Can you add a comment explaining the difference between struct_a and struct_b and why eligibileForUpConvert should return true? ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org