This is an automated email from the ASF dual-hosted git repository.

adamreeve pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 336fdb35868 GH-47435: [Python][Parquet] Add direct key 
encryption/decryption API (#49667)
336fdb35868 is described below

commit 336fdb35868af7ce46001b73703dd3f7f2e39b8d
Author: Sreesh Maheshwar <[email protected]>
AuthorDate: Sun May 24 23:25:58 2026 +0100

    GH-47435: [Python][Parquet] Add direct key encryption/decryption API 
(#49667)
    
    ### Rationale for this change
    
    See https://github.com/apache/arrow/issues/47435.
    
    ### What changes are included in this PR?
    
    Adds direct encryption / decryption Python API
    
    ### Are these changes tested?
    
    Yes, see PR.
    
    ### Are there any user-facing changes?
    
    Yes, new Python bindings.
    * GitHub Issue: #47435
    
    Authored-by: Sreesh Maheshwar <[email protected]>
    Signed-off-by: Adam Reeve <[email protected]>
---
 docs/source/python/api/formats.rst              |   2 +
 python/pyarrow/_parquet_encryption.pxd          |   5 +
 python/pyarrow/_parquet_encryption.pyx          | 209 ++++++++++++++++++++++
 python/pyarrow/includes/libparquet.pxd          |  25 ++-
 python/pyarrow/parquet/encryption.py            |   4 +-
 python/pyarrow/tests/parquet/test_encryption.py | 226 ++++++++++++++++++++++++
 6 files changed, 469 insertions(+), 2 deletions(-)

diff --git a/docs/source/python/api/formats.rst 
b/docs/source/python/api/formats.rst
index a4f02084c4a..57a5e824fab 100644
--- a/docs/source/python/api/formats.rst
+++ b/docs/source/python/api/formats.rst
@@ -119,6 +119,8 @@ Encrypted Parquet Files
    KmsConnectionConfig
    EncryptionConfiguration
    DecryptionConfiguration
+   create_encryption_properties
+   create_decryption_properties
 
 .. _api.orc:
 
diff --git a/python/pyarrow/_parquet_encryption.pxd 
b/python/pyarrow/_parquet_encryption.pxd
index 48939fe277f..1a12a6d6785 100644
--- a/python/pyarrow/_parquet_encryption.pxd
+++ b/python/pyarrow/_parquet_encryption.pxd
@@ -20,6 +20,11 @@
 
 from pyarrow.includes.common cimport *
 from pyarrow.includes.libparquet_encryption cimport *
+from pyarrow.includes.libparquet cimport (
+    CSecureString,
+    CFileDecryptionPropertiesBuilder,
+    CFileEncryptionPropertiesBuilder,
+)
 from pyarrow._parquet cimport (ParquetCipher,
                                CFileEncryptionProperties,
                                CFileDecryptionProperties,
diff --git a/python/pyarrow/_parquet_encryption.pyx 
b/python/pyarrow/_parquet_encryption.pyx
index db6a6b56ac4..7fe7fa7491d 100644
--- a/python/pyarrow/_parquet_encryption.pyx
+++ b/python/pyarrow/_parquet_encryption.pyx
@@ -711,3 +711,212 @@ cdef shared_ptr[CDecryptionConfiguration] 
pyarrow_unwrap_decryptionconfig(object
     if isinstance(decryptionconfig, DecryptionConfiguration):
         return (<DecryptionConfiguration> decryptionconfig).unwrap()
     raise TypeError("Expected DecryptionConfiguration, got %s" % 
type(decryptionconfig))
+
+
+def create_decryption_properties(
+    footer_key,
+    *,
+    aad_prefix=None,
+    bint check_footer_integrity=True,
+    bint allow_plaintext_files=False,
+):
+    """
+    Create FileDecryptionProperties using a direct footer key.
+
+    This is a low-level API that constructs decryption properties directly
+    from a plaintext key, bypassing the KMS-based :class:`CryptoFactory`.
+    It is intended for callers that manage key wrapping and storage
+    themselves (e.g. an application-level scheme).
+
+    For most use cases, prefer the higher-level :class:`CryptoFactory`
+    with :class:`DecryptionConfiguration`, which implements the full
+    Parquet key management specification and is interoperable with
+    other tools and frameworks.
+
+    .. note::
+       Currently only uniform encryption (single key for footer and all
+       columns) is supported with this method. Per-column keys are not
+       yet available; files encrypted with per-column keys cannot be
+       decrypted using this function.
+
+    Parameters
+    ----------
+    footer_key : bytes
+        The decryption key for the file footer and all columns (uniform
+        encryption). Must be 16, 24, or 32 bytes for AES-128, AES-192,
+        or AES-256 respectively.
+    aad_prefix : bytes, optional
+        Additional Authenticated Data prefix. Must match the AAD prefix
+        that was used during encryption. Required if the AAD prefix was
+        not stored in the file metadata during encryption.
+    check_footer_integrity : bool, default True
+        Whether to verify footer integrity using the signature stored
+        in the file. Set to False only for debugging.
+    allow_plaintext_files : bool, default False
+        Whether to allow reading plaintext (unencrypted) files with
+        these decryption properties without raising an error.
+
+    Returns
+    -------
+    FileDecryptionProperties
+        Properties that can be passed to :func:`~pyarrow.parquet.read_table`,
+        :class:`~pyarrow.parquet.ParquetFile`, or
+        :class:`~pyarrow.dataset.ParquetFragmentScanOptions`.
+
+    Examples
+    --------
+    >>> import pyarrow.parquet as pq
+    >>> import pyarrow.parquet.encryption as pe
+    >>> props = pe.create_decryption_properties(
+    ...     footer_key=b'0123456789abcdef',
+    ...     aad_prefix=b'table_id',
+    ... )
+    >>> table = pq.read_table('encrypted.parquet', decryption_properties=props)
+    """
+    cdef:
+        c_string c_footer_key_str
+        CSecureString c_footer_key
+        CFileDecryptionPropertiesBuilder builder
+        shared_ptr[CFileDecryptionProperties] props
+
+    if not isinstance(footer_key, bytes):
+        raise TypeError(
+            f"footer_key must be bytes, not {type(footer_key).__name__}"
+        )
+    if len(footer_key) not in (16, 24, 32):
+        raise ValueError(
+            f"footer_key must be 16, 24, or 32 bytes, got {len(footer_key)}"
+        )
+
+    c_footer_key_str = <c_string>footer_key
+    c_footer_key = CSecureString(move(c_footer_key_str))
+    builder.footer_key(c_footer_key)
+
+    if aad_prefix is not None:
+        if not isinstance(aad_prefix, bytes):
+            raise TypeError(
+                f"aad_prefix must be bytes, not {type(aad_prefix).__name__}"
+            )
+        builder.aad_prefix(<c_string>aad_prefix)
+
+    if not check_footer_integrity:
+        builder.disable_footer_signature_verification()
+
+    if allow_plaintext_files:
+        builder.plaintext_files_allowed()
+
+    props = builder.build()
+
+    return FileDecryptionProperties.wrap(props)
+
+
+def create_encryption_properties(
+    footer_key,
+    *,
+    aad_prefix=None,
+    bint store_aad_prefix=True,
+    encryption_algorithm="AES_GCM_V1",
+    bint plaintext_footer=False,
+):
+    """
+    Create FileEncryptionProperties using a direct footer key.
+
+    This is a low-level API that constructs encryption properties directly
+    from a plaintext key, bypassing the KMS-based :class:`CryptoFactory`.
+    It is intended for callers that manage key wrapping and storage
+    themselves (e.g. an application-level scheme).
+
+    .. warning::
+       The caller is responsible for key management best practices.
+       Reusing the same key for multiple files without unique data keys
+       weakens AES-GCM security. The higher-level :class:`CryptoFactory`
+       with :class:`EncryptionConfiguration` handles this automatically
+       and is interoperable with other tools and frameworks --
+       prefer it unless you have a specific reason to manage
+       keys yourself.
+
+    .. note::
+       Currently only uniform encryption (single key for footer and all
+       columns) is supported with this method. Per-column keys are not
+       yet available; the provided key encrypts both the footer and
+       every column.
+
+    Parameters
+    ----------
+    footer_key : bytes
+        The encryption key for the file footer and all columns (uniform
+        encryption). Must be 16, 24, or 32 bytes for AES-128, AES-192,
+        or AES-256 respectively.
+    aad_prefix : bytes, optional
+        Additional Authenticated Data prefix for cryptographic binding.
+    store_aad_prefix : bool, default True
+        Whether to store the AAD prefix in the Parquet file metadata.
+        Set to False when the AAD prefix will be supplied externally
+        at read time.
+        Only meaningful when *aad_prefix* is provided.
+    encryption_algorithm : str, default "AES_GCM_V1"
+        Encryption algorithm. Either ``"AES_GCM_V1"`` or
+        ``"AES_GCM_CTR_V1"``.
+    plaintext_footer : bool, default False
+        Whether to leave the file footer unencrypted. When True, file
+        schema and metadata are readable without a key.
+
+    Returns
+    -------
+    FileEncryptionProperties
+        Properties that can be passed to :func:`~pyarrow.parquet.write_table` 
or
+        :class:`~pyarrow.parquet.ParquetWriter`.
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> import pyarrow.parquet as pq
+    >>> import pyarrow.parquet.encryption as pe
+    >>> table = pa.table({'col': [1, 2, 3]})
+    >>> props = pe.create_encryption_properties(
+    ...     footer_key=b'0123456789abcdef',
+    ...     aad_prefix=b'table_id',
+    ...     store_aad_prefix=False,
+    ... )
+    >>> pq.write_table(table, 'encrypted.parquet', encryption_properties=props)
+    """
+    cdef:
+        c_string c_footer_key_str
+        CSecureString c_footer_key
+        CFileEncryptionPropertiesBuilder* builder
+        shared_ptr[CFileEncryptionProperties] props
+        ParquetCipher cipher
+
+    if not isinstance(footer_key, bytes):
+        raise TypeError(
+            f"footer_key must be bytes, not {type(footer_key).__name__}"
+        )
+    if len(footer_key) not in (16, 24, 32):
+        raise ValueError(
+            f"footer_key must be 16, 24, or 32 bytes, got {len(footer_key)}"
+        )
+
+    cipher = cipher_from_name(encryption_algorithm)
+    c_footer_key_str = <c_string>footer_key
+    c_footer_key = CSecureString(move(c_footer_key_str))
+    builder = new CFileEncryptionPropertiesBuilder(c_footer_key)
+    try:
+        builder.algorithm(cipher)
+
+        if aad_prefix is not None:
+            if not isinstance(aad_prefix, bytes):
+                raise TypeError(
+                    f"aad_prefix must be bytes, not 
{type(aad_prefix).__name__}"
+                )
+            builder.aad_prefix(<c_string>aad_prefix)
+            if not store_aad_prefix:
+                builder.disable_aad_prefix_storage()
+
+        if plaintext_footer:
+            builder.set_plaintext_footer()
+
+        props = builder.build()
+    finally:
+        del builder
+
+    return FileEncryptionProperties.wrap(props)
diff --git a/python/pyarrow/includes/libparquet.pxd 
b/python/pyarrow/includes/libparquet.pxd
index a834bd5dfa0..df353cc7805 100644
--- a/python/pyarrow/includes/libparquet.pxd
+++ b/python/pyarrow/includes/libparquet.pxd
@@ -22,7 +22,8 @@ from pyarrow.includes.libarrow cimport (Type, CChunkedArray, 
CScalar, CSchema,
                                         CStatus, CTable, CMemoryPool, CBuffer,
                                         CKeyValueMetadata, CRandomAccessFile,
                                         COutputStream, CCacheOptions,
-                                        TimeUnit, CRecordBatchReader)
+                                        TimeUnit, CRecordBatchReader,
+                                        CSecureString)
 
 
 cdef extern from "parquet/api/schema.h" namespace "parquet::schema" nogil:
@@ -635,6 +636,28 @@ cdef extern from "parquet/encryption/encryption.h" 
namespace "parquet" nogil:
             " parquet::FileDecryptionProperties":
         pass
 
+    cdef cppclass CFileDecryptionPropertiesBuilder\
+            " parquet::FileDecryptionProperties::Builder":
+        CFileDecryptionPropertiesBuilder() except +
+        CFileDecryptionPropertiesBuilder* footer_key(
+            CSecureString footer_key) except +
+        CFileDecryptionPropertiesBuilder* aad_prefix(
+            c_string aad_prefix) except +
+        CFileDecryptionPropertiesBuilder* 
disable_footer_signature_verification() except +
+        CFileDecryptionPropertiesBuilder* plaintext_files_allowed() except +
+        shared_ptr[CFileDecryptionProperties] build() except +
+
     cdef cppclass CFileEncryptionProperties\
             " parquet::FileEncryptionProperties":
         pass
+
+    cdef cppclass CFileEncryptionPropertiesBuilder\
+            " parquet::FileEncryptionProperties::Builder":
+        CFileEncryptionPropertiesBuilder(CSecureString footer_key) except +
+        CFileEncryptionPropertiesBuilder* set_plaintext_footer() except +
+        CFileEncryptionPropertiesBuilder* algorithm(
+            ParquetCipher parquet_cipher) except +
+        CFileEncryptionPropertiesBuilder* aad_prefix(
+            c_string aad_prefix) except +
+        CFileEncryptionPropertiesBuilder* disable_aad_prefix_storage() except +
+        shared_ptr[CFileEncryptionProperties] build() except +
diff --git a/python/pyarrow/parquet/encryption.py 
b/python/pyarrow/parquet/encryption.py
index df6eed913fa..ce95e5d4507 100644
--- a/python/pyarrow/parquet/encryption.py
+++ b/python/pyarrow/parquet/encryption.py
@@ -20,4 +20,6 @@ from pyarrow._parquet_encryption import (CryptoFactory,   # 
noqa
                                          EncryptionConfiguration,
                                          DecryptionConfiguration,
                                          KmsConnectionConfig,
-                                         KmsClient)
+                                         KmsClient,
+                                         create_encryption_properties,
+                                         create_decryption_properties)
diff --git a/python/pyarrow/tests/parquet/test_encryption.py 
b/python/pyarrow/tests/parquet/test_encryption.py
index 4e2fb069bd0..6a3842f3edf 100644
--- a/python/pyarrow/tests/parquet/test_encryption.py
+++ b/python/pyarrow/tests/parquet/test_encryption.py
@@ -37,6 +37,11 @@ FOOTER_KEY_NAME = "footer_key"
 COL_KEY = b"1234567890123450"
 COL_KEY_NAME = "col_key"
 
+DIRECT_KEY_128 = b"0123456789abcdef"
+DIRECT_KEY_192 = b"0123456789abcdef01234567"
+DIRECT_KEY_256 = b"0123456789abcdef0123456789abcdef"
+DIRECT_AAD_PREFIX = b"test_aad_prefix"
+
 
 # Marks all of the tests in this module
 # Ignore these with pytest ... -m 'not parquet_encryption'
@@ -722,3 +727,224 @@ def test_encrypted_parquet_read_table(tempdir, 
data_table, basic_encryption_conf
     result_table = pq.read_table(
         tempdir, decryption_properties=file_decryption_properties)
     assert data_table.equals(result_table)
+
+
+class TestDirectKeyEncryption:
+    """Tests for create_encryption_properties / 
create_decryption_properties."""
+
+    @pytest.mark.parametrize("key", [
+        DIRECT_KEY_128, DIRECT_KEY_192, DIRECT_KEY_256,
+    ], ids=["aes128", "aes192", "aes256"])
+    def test_roundtrip_key_sizes(self, tempdir, data_table, key):
+        path = tempdir / f"direct_{len(key) * 8}.parquet"
+
+        enc_props = pe.create_encryption_properties(footer_key=key)
+        pq.write_table(data_table, path, encryption_properties=enc_props)
+
+        dec_props = pe.create_decryption_properties(footer_key=key)
+        result = pq.read_table(path, decryption_properties=dec_props)
+        assert data_table.equals(result)
+
+    def test_roundtrip_with_aad_prefix(self, tempdir, data_table):
+        path = tempdir / "direct_aad.parquet"
+
+        enc_props = pe.create_encryption_properties(
+            footer_key=DIRECT_KEY_128,
+            aad_prefix=DIRECT_AAD_PREFIX,
+        )
+        pq.write_table(data_table, path, encryption_properties=enc_props)
+
+        dec_props = pe.create_decryption_properties(
+            footer_key=DIRECT_KEY_128,
+            aad_prefix=DIRECT_AAD_PREFIX,
+        )
+        result = pq.read_table(path, decryption_properties=dec_props)
+        assert data_table.equals(result)
+
+    def test_roundtrip_aad_prefix_not_stored(self, tempdir, data_table):
+        """When store_aad_prefix=False, reader must supply aad_prefix."""
+        path = tempdir / "direct_aad_not_stored.parquet"
+
+        enc_props = pe.create_encryption_properties(
+            footer_key=DIRECT_KEY_128,
+            aad_prefix=DIRECT_AAD_PREFIX,
+            store_aad_prefix=False,
+        )
+        pq.write_table(data_table, path, encryption_properties=enc_props)
+
+        # Reading without aad_prefix should fail
+        dec_props_no_aad = pe.create_decryption_properties(
+            footer_key=DIRECT_KEY_128,
+        )
+        with pytest.raises(IOError, match="AAD"):
+            pq.read_table(path, decryption_properties=dec_props_no_aad)
+
+        # Reading with correct aad_prefix should succeed
+        dec_props = pe.create_decryption_properties(
+            footer_key=DIRECT_KEY_128,
+            aad_prefix=DIRECT_AAD_PREFIX,
+        )
+        result = pq.read_table(path, decryption_properties=dec_props)
+        assert data_table.equals(result)
+
+    def test_wrong_aad_prefix_fails(self, tempdir, data_table):
+        path = tempdir / "direct_wrong_aad.parquet"
+
+        enc_props = pe.create_encryption_properties(
+            footer_key=DIRECT_KEY_128,
+            aad_prefix=DIRECT_AAD_PREFIX,
+        )
+        pq.write_table(data_table, path, encryption_properties=enc_props)
+
+        dec_props = pe.create_decryption_properties(
+            footer_key=DIRECT_KEY_128,
+            aad_prefix=b"wrong_prefix",
+        )
+        with pytest.raises(IOError, match="AAD"):
+            pq.read_table(path, decryption_properties=dec_props)
+
+    def test_encrypted_file_has_pare_magic(self, tempdir, data_table):
+        path = tempdir / "direct_magic.parquet"
+
+        enc_props = pe.create_encryption_properties(
+            footer_key=DIRECT_KEY_128)
+        pq.write_table(data_table, path, encryption_properties=enc_props)
+
+        with open(path, "rb") as f:
+            magic = f.read(4)
+        assert magic == b"PARE"
+
+    def test_plaintext_footer(self, tempdir, data_table):
+        path = tempdir / "direct_plaintext_footer.parquet"
+
+        enc_props = pe.create_encryption_properties(
+            footer_key=DIRECT_KEY_128,
+            plaintext_footer=True,
+        )
+        pq.write_table(data_table, path, encryption_properties=enc_props)
+
+        dec_props = pe.create_decryption_properties(
+            footer_key=DIRECT_KEY_128)
+        result = pq.read_table(path, decryption_properties=dec_props)
+        assert data_table.equals(result)
+
+    def test_aes_gcm_ctr_v1_algorithm(self, tempdir, data_table):
+        path = tempdir / "direct_ctr.parquet"
+
+        enc_props = pe.create_encryption_properties(
+            footer_key=DIRECT_KEY_128,
+            encryption_algorithm="AES_GCM_CTR_V1",
+        )
+        pq.write_table(data_table, path, encryption_properties=enc_props)
+
+        dec_props = pe.create_decryption_properties(
+            footer_key=DIRECT_KEY_128)
+        result = pq.read_table(path, decryption_properties=dec_props)
+        assert data_table.equals(result)
+
+    def test_wrong_key_fails(self, tempdir, data_table):
+        path = tempdir / "direct_wrong_key.parquet"
+
+        enc_props = pe.create_encryption_properties(
+            footer_key=DIRECT_KEY_128)
+        pq.write_table(data_table, path, encryption_properties=enc_props)
+
+        wrong_key = b"fedcba9876543210"
+        dec_props = pe.create_decryption_properties(footer_key=wrong_key)
+        with pytest.raises(IOError, match="decrypt"):
+            pq.read_table(path, decryption_properties=dec_props)
+
+    def test_reading_without_decryption_fails(self, tempdir, data_table):
+        path = tempdir / "direct_no_decrypt.parquet"
+
+        enc_props = pe.create_encryption_properties(
+            footer_key=DIRECT_KEY_128)
+        pq.write_table(data_table, path, encryption_properties=enc_props)
+
+        with pytest.raises(IOError, match="encrypted metadata"):
+            pq.read_table(path)
+
+    def test_allow_plaintext_files(self, tempdir, data_table):
+        """Plaintext file reads should work when allow_plaintext_files=True."""
+        path = tempdir / "plaintext.parquet"
+        pq.write_table(data_table, path)
+
+        dec_props = pe.create_decryption_properties(
+            footer_key=DIRECT_KEY_128,
+            allow_plaintext_files=True,
+        )
+        result = pq.read_table(path, decryption_properties=dec_props)
+        assert data_table.equals(result)
+
+    def test_plaintext_file_rejected_by_default(self, tempdir, data_table):
+        """Default allow_plaintext_files=False should reject plaintext 
files."""
+        path = tempdir / "plaintext_rejected.parquet"
+        pq.write_table(data_table, path)
+
+        dec_props = pe.create_decryption_properties(
+            footer_key=DIRECT_KEY_128)
+        with pytest.raises(IOError, match="plaintext"):
+            pq.read_table(path, decryption_properties=dec_props)
+
+    def test_check_footer_integrity_false(self, tempdir, data_table):
+        """check_footer_integrity=False should still allow decryption."""
+        path = tempdir / "direct_no_footer_check.parquet"
+
+        enc_props = pe.create_encryption_properties(
+            footer_key=DIRECT_KEY_128)
+        pq.write_table(data_table, path, encryption_properties=enc_props)
+
+        dec_props = pe.create_decryption_properties(
+            footer_key=DIRECT_KEY_128,
+            check_footer_integrity=False,
+        )
+        result = pq.read_table(path, decryption_properties=dec_props)
+        assert data_table.equals(result)
+
+    def test_plaintext_footer_has_par1_magic(self, tempdir, data_table):
+        """plaintext_footer=True should produce PAR1 magic, not PARE."""
+        path = tempdir / "direct_plaintext_magic.parquet"
+
+        enc_props = pe.create_encryption_properties(
+            footer_key=DIRECT_KEY_128,
+            plaintext_footer=True,
+        )
+        pq.write_table(data_table, path, encryption_properties=enc_props)
+
+        with open(path, "rb") as f:
+            magic = f.read(4)
+        assert magic == b"PAR1"
+
+    def test_invalid_key_length_raises(self):
+        with pytest.raises(ValueError, match="16, 24, or 32 bytes"):
+            pe.create_encryption_properties(footer_key=b"short")
+
+        with pytest.raises(ValueError, match="16, 24, or 32 bytes"):
+            pe.create_encryption_properties(footer_key=b"")
+
+        with pytest.raises(ValueError, match="16, 24, or 32 bytes"):
+            pe.create_decryption_properties(footer_key=b"short")
+
+    def test_invalid_algorithm_raises(self):
+        with pytest.raises(ValueError, match="Invalid cipher name"):
+            pe.create_encryption_properties(
+                footer_key=DIRECT_KEY_128,
+                encryption_algorithm="INVALID",
+            )
+
+    def test_footer_key_rejects_non_bytes(self):
+        with pytest.raises(TypeError, match="footer_key must be bytes"):
+            pe.create_encryption_properties(footer_key="0123456789abcdef")
+
+        with pytest.raises(TypeError, match="footer_key must be bytes"):
+            pe.create_decryption_properties(footer_key="0123456789abcdef")
+
+        with pytest.raises(TypeError, match="footer_key must be bytes"):
+            pe.create_encryption_properties(footer_key=None)
+
+    def test_aad_prefix_rejects_str(self, tempdir, data_table):
+        with pytest.raises(TypeError, match="aad_prefix must be bytes"):
+            pe.create_encryption_properties(
+                footer_key=DIRECT_KEY_128,
+                aad_prefix="not_bytes",
+            )

Reply via email to