ggershinsky commented on a change in pull request #776: URL: https://github.com/apache/parquet-mr/pull/776#discussion_r415622844
########## File path: parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java ########## @@ -1185,70 +1275,189 @@ static long getOffset(ColumnChunk columnChunk) { return offset; } + private static void verifyFooterIntegrity(InputStream from, InternalFileDecryptor fileDecryptor, + int combinedFooterLength) throws IOException { + + byte[] nonce = new byte[AesCipher.NONCE_LENGTH]; + from.read(nonce); + byte[] gcmTag = new byte[AesCipher.GCM_TAG_LENGTH]; + from.read(gcmTag); + + AesGcmEncryptor footerSigner = fileDecryptor.createSignedFooterEncryptor(); + + byte[] footerAndSignature = ((ByteBufferInputStream) from).slice(0).array(); + int footerSignatureLength = AesCipher.NONCE_LENGTH + AesCipher.GCM_TAG_LENGTH; + byte[] serializedFooter = new byte[combinedFooterLength - footerSignatureLength]; + System.arraycopy(footerAndSignature, 0, serializedFooter, 0, serializedFooter.length); + + byte[] signedFooterAAD = AesCipher.createFooterAAD(fileDecryptor.getFileAAD()); + byte[] encryptedFooterBytes = footerSigner.encrypt(false, serializedFooter, nonce, signedFooterAAD); + byte[] calculatedTag = new byte[AesCipher.GCM_TAG_LENGTH]; + System.arraycopy(encryptedFooterBytes, encryptedFooterBytes.length - AesCipher.GCM_TAG_LENGTH, + calculatedTag, 0, AesCipher.GCM_TAG_LENGTH); + if (!Arrays.equals(gcmTag, calculatedTag)) { + throw new TagVerificationException("Signature mismatch in plaintext footer"); + } + } + public ParquetMetadata readParquetMetadata(final InputStream from, MetadataFilter filter) throws IOException { + return readParquetMetadata(from, filter, null, false, 0); + } + + public ParquetMetadata readParquetMetadata(final InputStream from, MetadataFilter filter, + final InternalFileDecryptor fileDecryptor, final boolean encryptedFooter, + final int combinedFooterLength) throws IOException { + + final BlockCipher.Decryptor footerDecryptor = (encryptedFooter? fileDecryptor.fetchFooterDecryptor() : null); + final byte[] encryptedFooterAAD = (encryptedFooter? AesCipher.createFooterAAD(fileDecryptor.getFileAAD()) : null); + FileMetaData fileMetaData = filter.accept(new MetadataFilterVisitor<FileMetaData, IOException>() { @Override public FileMetaData visit(NoFilter filter) throws IOException { - return readFileMetaData(from); + return readFileMetaData(from, footerDecryptor, encryptedFooterAAD); } @Override public FileMetaData visit(SkipMetadataFilter filter) throws IOException { - return readFileMetaData(from, true); + return readFileMetaData(from, true, footerDecryptor, encryptedFooterAAD); } @Override public FileMetaData visit(OffsetMetadataFilter filter) throws IOException { - return filterFileMetaDataByStart(readFileMetaData(from), filter); + return filterFileMetaDataByStart(readFileMetaData(from, footerDecryptor, encryptedFooterAAD), filter); } @Override public FileMetaData visit(RangeMetadataFilter filter) throws IOException { - return filterFileMetaDataByMidpoint(readFileMetaData(from), filter); + return filterFileMetaDataByMidpoint(readFileMetaData(from, footerDecryptor, encryptedFooterAAD), filter); } }); LOG.debug("{}", fileMetaData); - ParquetMetadata parquetMetadata = fromParquetMetadata(fileMetaData); + + if (!encryptedFooter && null != fileDecryptor) { + if (!fileMetaData.isSetEncryption_algorithm()) { // Plaintext file Review comment: is there a requirement in the code formatting rules in this community to keep comments in separate lines? ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org