Github user vvysotskyi commented on a diff in the pull request: https://github.com/apache/drill/pull/805#discussion_r133927602 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetGroupScan.java --- @@ -548,23 +567,57 @@ public void populatePruningVector(ValueVector v, int index, SchemaPath column, S NullableVarCharVector varCharVector = (NullableVarCharVector) v; Object s = partitionValueMap.get(f).get(column); byte[] bytes; - if (s instanceof String) { // if the metadata was read from a JSON cache file it maybe a string type - bytes = ((String) s).getBytes(); - } else if (s instanceof Binary) { - bytes = ((Binary) s).getBytes(); - } else if (s instanceof byte[]) { - bytes = (byte[]) s; + if (s == null) { + varCharVector.getMutator().setNull(index); + return; } else { - throw new UnsupportedOperationException("Unable to create column data for type: " + type); + bytes = getBytes(type, s); } varCharVector.getMutator().setSafe(index, bytes, 0, bytes.length); return; } + case INTERVAL: { + NullableIntervalVector intervalVector = (NullableIntervalVector) v; + Object s = partitionValueMap.get(f).get(column); + byte[] bytes; + if (s == null) { + intervalVector.getMutator().setNull(index); + return; + } else { + bytes = getBytes(type, s); + } + intervalVector.getMutator().setSafe(index, 1, + ParquetReaderUtility.getIntFromLEBytes(bytes, 0), + ParquetReaderUtility.getIntFromLEBytes(bytes, 4), + ParquetReaderUtility.getIntFromLEBytes(bytes, 8)); + return; + } default: throw new UnsupportedOperationException("Unsupported type: " + type); } } + /** + * Returns the sequence of bytes received from {@code Object source}. + * + * @param type the column type + * @param source the source of the bytes sequence + * @return bytes sequence obtained from {@code Object source} + */ + private byte[] getBytes(MinorType type, Object source) { + byte[] bytes; + if (source instanceof String) { // if the metadata was read from a JSON cache file it maybe a string type + bytes = Base64.decodeBase64(((String) source).getBytes()); --- End diff -- > Note, however, that the casting is unfortunate. The caller knows the type. Better to have: The caller does not know the type since `source` is received from the map of objects. So the casting should be used there. > Seems we'd also need a to/from Base64 method so that, on read, we do: Value vector mutator interface does not have a method that allows passing bytes. Such methods implemented in the concrete implementation of mutators, so we could not create such to/from Base64 method. > Strings need not be Base64 encoded They should be Base64 encoded since byte array may have an encoding that differs from the UTF-8. I moved this conversion from the string to a byte array into the `ParquetReaderUtility.correctBinaryInMetadataCache()` method. So now we are considering the metadata version and chose the way how to encode the string.
--- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---