[ https://issues.apache.org/jira/browse/ORC-54?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15364829#comment-15364829 ]
ASF GitHub Bot commented on ORC-54: ----------------------------------- Github user omalley commented on a diff in the pull request: https://github.com/apache/orc/pull/40#discussion_r69785648 --- Diff: java/core/src/java/org/apache/orc/impl/SchemaEvolution.java --- @@ -85,55 +142,78 @@ void buildMapping(TypeDescription fileType, // check the easy case first if (fileType.getCategory() == readerType.getCategory()) { switch (readerType.getCategory()) { - case BOOLEAN: - case BYTE: - case SHORT: - case INT: - case LONG: - case DOUBLE: - case FLOAT: - case STRING: - case TIMESTAMP: - case BINARY: - case DATE: - // these are always a match - break; - case CHAR: - case VARCHAR: - // HIVE-13648: Look at ORC data type conversion edge cases (CHAR, VARCHAR, DECIMAL) - isOk = fileType.getMaxLength() == readerType.getMaxLength(); - break; - case DECIMAL: - // HIVE-13648: Look at ORC data type conversion edge cases (CHAR, VARCHAR, DECIMAL) - // TODO we don't enforce scale and precision checks, but probably should - break; - case UNION: - case MAP: - case LIST: { - // these must be an exact match - List<TypeDescription> fileChildren = fileType.getChildren(); - List<TypeDescription> readerChildren = readerType.getChildren(); - if (fileChildren.size() == readerChildren.size()) { - for(int i=0; i < fileChildren.size(); ++i) { - buildMapping(fileChildren.get(i), readerChildren.get(i)); - } - } else { - isOk = false; + case BOOLEAN: + case BYTE: + case SHORT: + case INT: + case LONG: + case DOUBLE: + case FLOAT: + case STRING: + case TIMESTAMP: + case BINARY: + case DATE: + // these are always a match + break; + case CHAR: + case VARCHAR: + // HIVE-13648: Look at ORC data type conversion edge cases (CHAR, VARCHAR, DECIMAL) + isOk = fileType.getMaxLength() == readerType.getMaxLength(); + break; + case DECIMAL: + // HIVE-13648: Look at ORC data type conversion edge cases (CHAR, VARCHAR, DECIMAL) + // TODO we don't enforce scale and precision checks, but probably should + break; + case UNION: + case MAP: + case LIST: { + // these must be an exact match + List<TypeDescription> fileChildren = fileType.getChildren(); + List<TypeDescription> readerChildren = readerType.getChildren(); + if (fileChildren.size() == readerChildren.size()) { + for (int i = 0; i < fileChildren.size(); ++i) { + buildMapping(fileChildren.get(i), readerChildren.get(i), useFieldNames); } - break; + } else { + isOk = false; } - case STRUCT: { - // allow either side to have fewer fields than the other - List<TypeDescription> fileChildren = fileType.getChildren(); - List<TypeDescription> readerChildren = readerType.getChildren(); + break; + } + case STRUCT: { + List<TypeDescription> readerChildren = readerType.getChildren(); + List<String> readerFieldNames = readerType.getFieldNames(); + + List<String> fileFieldNames = fileType.getFieldNames(); + List<TypeDescription> fileChildren = + fileType.getChildren(); + + if (useFieldNames) { + Map<String, TypeDescription> fileTypesIdx = new HashMap<>(); + for (int i = 0; i < fileFieldNames.size(); i++) { + fileTypesIdx.put(fileFieldNames.get(i), fileChildren.get(i)); + } + + for (int i = 0; i < readerFieldNames.size(); i++) { + String readerFieldName = readerFieldNames.get(i); + TypeDescription readerField = readerChildren.get(i); + + TypeDescription fileField = fileTypesIdx.get(readerFieldName); + if (fileField == null) { + continue; + } + + buildMapping(fileField, readerField, true); + } + } else { int jointSize = Math.min(fileChildren.size(), readerChildren.size()); - for(int i=0; i < jointSize; ++i) { - buildMapping(fileChildren.get(i), readerChildren.get(i)); + for (int i = 0; i < jointSize; ++i) { + buildMapping(fileChildren.get(i), readerChildren.get(i), false); --- End diff -- This should be true also to match field names down below the root. > Evolve schemas based on field name rather than index > ---------------------------------------------------- > > Key: ORC-54 > URL: https://issues.apache.org/jira/browse/ORC-54 > Project: Orc > Issue Type: Improvement > Reporter: Mark Wagner > Assignee: Mark Wagner > > Schema evolution as it stands today allows adding fields to the end of > schemas or removing them from the end. However, because it is based on the > index of the column, you can only ever add or remove -- not both. > ORC files have the full schema information of their contents, so there's > actually enough metadata to support changing columns anywhere in the schema. -- This message was sent by Atlassian JIRA (v6.3.4#6332)