[
https://issues.apache.org/jira/browse/PARQUET-1229?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17093195#comment-17093195
]
ASF GitHub Bot commented on PARQUET-1229:
-----------------------------------------
ggershinsky commented on a change in pull request #776:
URL: https://github.com/apache/parquet-mr/pull/776#discussion_r415622844
##########
File path:
parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
##########
@@ -1185,70 +1275,189 @@ static long getOffset(ColumnChunk columnChunk) {
return offset;
}
+ private static void verifyFooterIntegrity(InputStream from,
InternalFileDecryptor fileDecryptor,
+ int combinedFooterLength) throws IOException {
+
+ byte[] nonce = new byte[AesCipher.NONCE_LENGTH];
+ from.read(nonce);
+ byte[] gcmTag = new byte[AesCipher.GCM_TAG_LENGTH];
+ from.read(gcmTag);
+
+ AesGcmEncryptor footerSigner =
fileDecryptor.createSignedFooterEncryptor();
+
+ byte[] footerAndSignature = ((ByteBufferInputStream)
from).slice(0).array();
+ int footerSignatureLength = AesCipher.NONCE_LENGTH +
AesCipher.GCM_TAG_LENGTH;
+ byte[] serializedFooter = new byte[combinedFooterLength -
footerSignatureLength];
+ System.arraycopy(footerAndSignature, 0, serializedFooter, 0,
serializedFooter.length);
+
+ byte[] signedFooterAAD =
AesCipher.createFooterAAD(fileDecryptor.getFileAAD());
+ byte[] encryptedFooterBytes = footerSigner.encrypt(false,
serializedFooter, nonce, signedFooterAAD);
+ byte[] calculatedTag = new byte[AesCipher.GCM_TAG_LENGTH];
+ System.arraycopy(encryptedFooterBytes, encryptedFooterBytes.length -
AesCipher.GCM_TAG_LENGTH,
+ calculatedTag, 0, AesCipher.GCM_TAG_LENGTH);
+ if (!Arrays.equals(gcmTag, calculatedTag)) {
+ throw new TagVerificationException("Signature mismatch in plaintext
footer");
+ }
+ }
+
public ParquetMetadata readParquetMetadata(final InputStream from,
MetadataFilter filter) throws IOException {
+ return readParquetMetadata(from, filter, null, false, 0);
+ }
+
+ public ParquetMetadata readParquetMetadata(final InputStream from,
MetadataFilter filter,
+ final InternalFileDecryptor fileDecryptor, final boolean
encryptedFooter,
+ final int combinedFooterLength) throws IOException {
+
+ final BlockCipher.Decryptor footerDecryptor = (encryptedFooter?
fileDecryptor.fetchFooterDecryptor() : null);
+ final byte[] encryptedFooterAAD = (encryptedFooter?
AesCipher.createFooterAAD(fileDecryptor.getFileAAD()) : null);
+
FileMetaData fileMetaData = filter.accept(new
MetadataFilterVisitor<FileMetaData, IOException>() {
@Override
public FileMetaData visit(NoFilter filter) throws IOException {
- return readFileMetaData(from);
+ return readFileMetaData(from, footerDecryptor, encryptedFooterAAD);
}
@Override
public FileMetaData visit(SkipMetadataFilter filter) throws IOException {
- return readFileMetaData(from, true);
+ return readFileMetaData(from, true, footerDecryptor,
encryptedFooterAAD);
}
@Override
public FileMetaData visit(OffsetMetadataFilter filter) throws
IOException {
- return filterFileMetaDataByStart(readFileMetaData(from), filter);
+ return filterFileMetaDataByStart(readFileMetaData(from,
footerDecryptor, encryptedFooterAAD), filter);
}
@Override
public FileMetaData visit(RangeMetadataFilter filter) throws IOException
{
- return filterFileMetaDataByMidpoint(readFileMetaData(from), filter);
+ return filterFileMetaDataByMidpoint(readFileMetaData(from,
footerDecryptor, encryptedFooterAAD), filter);
}
});
LOG.debug("{}", fileMetaData);
- ParquetMetadata parquetMetadata = fromParquetMetadata(fileMetaData);
+
+ if (!encryptedFooter && null != fileDecryptor) {
+ if (!fileMetaData.isSetEncryption_algorithm()) { // Plaintext file
Review comment:
is there a requirement in the code formatting rules in this community to
keep comments in separate lines?
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
> parquet-mr code changes for encryption support
> ----------------------------------------------
>
> Key: PARQUET-1229
> URL: https://issues.apache.org/jira/browse/PARQUET-1229
> Project: Parquet
> Issue Type: Sub-task
> Components: parquet-mr
> Reporter: Gidon Gershinsky
> Assignee: Gidon Gershinsky
> Priority: Major
> Labels: pull-request-available
>
> Addition of encryption/decryption support to the existing Parquet classes and
> APIs
--
This message was sent by Atlassian Jira
(v8.3.4#803005)