tcrasset commented on PR #39216:
URL: https://github.com/apache/arrow/pull/39216#issuecomment-1854183160
One of my tests is quite flaky, it passes 80% of the time, and I don't know
how to debug. The error message is not always the same:
```text
python -m pytest arrow/python/pyarrow/tests/test_dataset_encryption.py -k
test_dataset_encryption_decryption
======================================== test session starts
=========================================
platform linux -- Python 3.8.10, pytest-7.4.3, pluggy-1.3.0
rootdir: /home/tom/Documents/dev/arrow_dev/arrow/python
configfile: setup.cfg
plugins: hypothesis-6.92.0, lazy-fixture-0.6.3
collected 3 items / 1 deselected / 2 selected
arrow/python/pyarrow/tests/test_dataset_encryption.py .F
[100%]
============================================== FAILURES
==============================================
_______________________
test_dataset_encryption_decryption[uniform_encryption] _______________________
encryption_config = <pyarrow._parquet_encryption.EncryptionConfiguration
object at 0x7fa36a03f270>
@pytest.mark.skipif(
encryption_unavailable, reason="Parquet Encryption is not currently
enabled"
)
@pytest.mark.parametrize(
"encryption_config",
[
create_encryption_config(),
create_uniform_encryption_config(),
],
ids=["column_keys", "uniform_encryption"],
)
def test_dataset_encryption_decryption(encryption_config):
table = create_sample_table()
decryption_config = create_decryption_config()
kms_connection_config = create_kms_connection_config()
crypto_factory = pe.CryptoFactory(kms_factory)
parquet_encryption_cfg = ds.ParquetEncryptionConfig(
crypto_factory, kms_connection_config, encryption_config
)
parquet_decryption_cfg = ds.ParquetDecryptionConfig(
crypto_factory, kms_connection_config, decryption_config
)
# create write_options with dataset encryption config
pformat = pa.dataset.ParquetFileFormat()
write_options =
pformat.make_write_options(encryption_config=parquet_encryption_cfg)
mockfs = fs._MockFileSystem()
mockfs.create_dir("/")
ds.write_dataset(
data=table,
base_dir="sample_dataset",
format=pformat,
file_options=write_options,
filesystem=mockfs,
)
# read without decryption config -> should error is dataset was
properly encrypted
pformat = pa.dataset.ParquetFileFormat()
with pytest.raises(IOError, match=r"no decryption"):
ds.dataset("sample_dataset", format=pformat, filesystem=mockfs)
# set decryption config for parquet fragment scan options
pq_scan_opts = ds.ParquetFragmentScanOptions(
decryption_config=parquet_decryption_cfg
)
pformat =
pa.dataset.ParquetFileFormat(default_fragment_scan_options=pq_scan_opts)
dataset = ds.dataset("sample_dataset", format=pformat,
filesystem=mockfs)
> assert table.equals(dataset.to_table())
arrow/python/pyarrow/tests/test_dataset_encryption.py:158:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _
pyarrow/_dataset.pyx:562: in pyarrow._dataset.Dataset.to_table
???
pyarrow/_dataset.pyx:3722: in pyarrow._dataset.Scanner.to_table
???
pyarrow/error.pxi:154: in pyarrow.lib.pyarrow_internal_check_status
???
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _
> ???
E OSError: Failed decryption finalization
pyarrow/error.pxi:91: OSError
====================================== short test summary info
=======================================
FAILED
arrow/python/pyarrow/tests/test_dataset_encryption.py::test_dataset_encryption_decryption[uniform_encryption]
- OSError: Failed decryption finalization
============================= 1 failed, 1 passed, 1 deselected in 0.07s
==============================
```
```text
❯ python -m pytest arrow/python/pyarrow/tests/test_dataset_encryption.py -k
test_dataset_encryption_decryption
======================================== test session starts
=========================================
platform linux -- Python 3.8.10, pytest-7.4.3, pluggy-1.3.0
rootdir: /home/tom/Documents/dev/arrow_dev/arrow/python
configfile: setup.cfg
plugins: hypothesis-6.92.0, lazy-fixture-0.6.3
collected 3 items / 1 deselected / 2 selected
arrow/python/pyarrow/tests/test_dataset_encryption.py .Fatal Python error:
Segmentation fault
Thread 0x00007f894e968740 (most recent call first):
File
"/home/tom/Documents/dev/arrow_dev/arrow/python/pyarrow/tests/test_dataset_encryption.py",
line 158 in test_dataset_encryption_decryption
File
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/_pytest/python.py",
line 194 in pytest_pyfunc_call
File
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/pluggy/_callers.py",
line 77 in _multicall
File
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/pluggy/_manager.py",
line 115 in _hookexec
File
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/pluggy/_hooks.py",
line 493 in __call__
File
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/_pytest/python.py",
line 1792 in runtest
File
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/_pytest/runner.py",
line 169 in pytest_runtest_call
File
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/pluggy/_callers.py",
line 77 in _multicall
File
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/pluggy/_manager.py",
line 115 in _hookexec
File
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/pluggy/_hooks.py",
line 493 in __call__
File
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/_pytest/runner.py",
line 262 in <lambda>
File
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/_pytest/runner.py",
line 341 in from_call
File
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/_pytest/runner.py",
line 261 in call_runtest_hook
File
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/_pytest/runner.py",
line 222 in call_and_report
File
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/_pytest/runner.py",
line 133 in runtestprotocol
File
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/_pytest/runner.py",
line 114 in pytest_runtest_protocol
File
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/pluggy/_callers.py",
line 77 in _multicall
File
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/pluggy/_manager.py",
line 115 in _hookexec
File
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/pluggy/_hooks.py",
line 493 in __call__
File
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/_pytest/main.py",
line 350 in pytest_runtestloop
File
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/pluggy/_callers.py",
line 77 in _multicall
File
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/pluggy/_manager.py",
line 115 in _hookexec
File
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/pluggy/_hooks.py",
line 493 in __call__
File
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/_pytest/main.py",
line 325 in _main
File
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/_pytest/main.py",
line 271 in wrap_session
File
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/_pytest/main.py",
line 318 in pytest_cmdline_main
File
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/pluggy/_callers.py",
line 77 in _multicall
File
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/pluggy/_manager.py",
line 115 in _hookexec
File
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/pluggy/_hooks.py",
line 493 in __call__
File
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/_pytest/config/__init__.py",
line 169 in main
File
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/_pytest/config/__init__.py",
line 192 in console_main
File
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/pytest/__main__.py",
line 5 in <module>
File "/usr/lib/python3.8/runpy.py", line 87 in _run_code
File "/usr/lib/python3.8/runpy.py", line 194 in _run_module_as_main
^[[A
[1] 240505 segmentation fault (core dumped) python -m pytest
arrow/python/pyarrow/tests/test_dataset_encryption.py -k
```
```text
❯ python -m pytest arrow/python/pyarrow/tests/test_dataset_encryption.py -k
test_dataset_encryption_decryption
======================================== test session starts
=========================================
platform linux -- Python 3.8.10, pytest-7.4.3, pluggy-1.3.0
rootdir: /home/tom/Documents/dev/arrow_dev/arrow/python
configfile: setup.cfg
plugins: hypothesis-6.92.0, lazy-fixture-0.6.3
collected 3 items / 1 deselected / 2 selected
arrow/python/pyarrow/tests/test_dataset_encryption.py .F
[100%]
============================================== FAILURES
==============================================
_______________________
test_dataset_encryption_decryption[uniform_encryption] _______________________
encryption_config = <pyarrow._parquet_encryption.EncryptionConfiguration
object at 0x7f7bc6b8a300>
@pytest.mark.skipif(
encryption_unavailable, reason="Parquet Encryption is not currently
enabled"
)
@pytest.mark.parametrize(
"encryption_config",
[
create_encryption_config(),
create_uniform_encryption_config(),
],
ids=["column_keys", "uniform_encryption"],
)
def test_dataset_encryption_decryption(encryption_config):
table = create_sample_table()
decryption_config = create_decryption_config()
kms_connection_config = create_kms_connection_config()
crypto_factory = pe.CryptoFactory(kms_factory)
parquet_encryption_cfg = ds.ParquetEncryptionConfig(
crypto_factory, kms_connection_config, encryption_config
)
parquet_decryption_cfg = ds.ParquetDecryptionConfig(
crypto_factory, kms_connection_config, decryption_config
)
# create write_options with dataset encryption config
pformat = pa.dataset.ParquetFileFormat()
write_options =
pformat.make_write_options(encryption_config=parquet_encryption_cfg)
mockfs = fs._MockFileSystem()
mockfs.create_dir("/")
ds.write_dataset(
data=table,
base_dir="sample_dataset",
format=pformat,
file_options=write_options,
filesystem=mockfs,
)
# read without decryption config -> should error is dataset was
properly encrypted
pformat = pa.dataset.ParquetFileFormat()
with pytest.raises(IOError, match=r"no decryption"):
ds.dataset("sample_dataset", format=pformat, filesystem=mockfs)
# set decryption config for parquet fragment scan options
pq_scan_opts = ds.ParquetFragmentScanOptions(
decryption_config=parquet_decryption_cfg
)
pformat =
pa.dataset.ParquetFileFormat(default_fragment_scan_options=pq_scan_opts)
dataset = ds.dataset("sample_dataset", format=pformat,
filesystem=mockfs)
> assert table.equals(dataset.to_table())
arrow/python/pyarrow/tests/test_dataset_encryption.py:158:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _
pyarrow/_dataset.pyx:562: in pyarrow._dataset.Dataset.to_table
???
pyarrow/_dataset.pyx:3722: in pyarrow._dataset.Scanner.to_table
???
pyarrow/error.pxi:154: in pyarrow.lib.pyarrow_internal_check_status
???
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _ _ _ _ _ _ _ _ _ _
> ???
E OSError: Failed decryption finalizationDeserializing page header failed.
pyarrow/error.pxi:91: OSError
====================================== short test summary info
=======================================
FAILED
arrow/python/pyarrow/tests/test_dataset_encryption.py::test_dataset_encryption_decryption[uniform_encryption]
- OSError: Failed decryption finalizationDeserializing page header failed.
============================= 1 failed, 1 passed, 1 deselected in 0.07s
==============================
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]