tcrasset commented on PR #39216:
URL: https://github.com/apache/arrow/pull/39216#issuecomment-1854183160

   One of my tests is quite flaky, it passes 80% of the time, and I don't know 
how to debug. The error message is not always the same:
   
   
   ```text
   python -m pytest arrow/python/pyarrow/tests/test_dataset_encryption.py -k 
test_dataset_encryption_decryption
   ======================================== test session starts 
=========================================
   platform linux -- Python 3.8.10, pytest-7.4.3, pluggy-1.3.0
   rootdir: /home/tom/Documents/dev/arrow_dev/arrow/python
   configfile: setup.cfg
   plugins: hypothesis-6.92.0, lazy-fixture-0.6.3
   collected 3 items / 1 deselected / 2 selected                                
                        
   
   arrow/python/pyarrow/tests/test_dataset_encryption.py .F                     
                  [100%]
   
   ============================================== FAILURES 
==============================================
   _______________________ 
test_dataset_encryption_decryption[uniform_encryption] _______________________
   
   encryption_config = <pyarrow._parquet_encryption.EncryptionConfiguration 
object at 0x7fa36a03f270>
   
       @pytest.mark.skipif(
           encryption_unavailable, reason="Parquet Encryption is not currently 
enabled"
       )
       @pytest.mark.parametrize(
           "encryption_config",
           [
               create_encryption_config(),
               create_uniform_encryption_config(),
           ],
           ids=["column_keys", "uniform_encryption"],
       )
       def test_dataset_encryption_decryption(encryption_config):
           table = create_sample_table()
       
           decryption_config = create_decryption_config()
           kms_connection_config = create_kms_connection_config()
       
           crypto_factory = pe.CryptoFactory(kms_factory)
           parquet_encryption_cfg = ds.ParquetEncryptionConfig(
               crypto_factory, kms_connection_config, encryption_config
           )
           parquet_decryption_cfg = ds.ParquetDecryptionConfig(
               crypto_factory, kms_connection_config, decryption_config
           )
       
           # create write_options with dataset encryption config
           pformat = pa.dataset.ParquetFileFormat()
           write_options = 
pformat.make_write_options(encryption_config=parquet_encryption_cfg)
       
           mockfs = fs._MockFileSystem()
           mockfs.create_dir("/")
       
           ds.write_dataset(
               data=table,
               base_dir="sample_dataset",
               format=pformat,
               file_options=write_options,
               filesystem=mockfs,
           )
       
           # read without decryption config -> should error is dataset was 
properly encrypted
           pformat = pa.dataset.ParquetFileFormat()
           with pytest.raises(IOError, match=r"no decryption"):
               ds.dataset("sample_dataset", format=pformat, filesystem=mockfs)
       
           # set decryption config for parquet fragment scan options
           pq_scan_opts = ds.ParquetFragmentScanOptions(
               decryption_config=parquet_decryption_cfg
           )
           pformat = 
pa.dataset.ParquetFileFormat(default_fragment_scan_options=pq_scan_opts)
           dataset = ds.dataset("sample_dataset", format=pformat, 
filesystem=mockfs)
       
   >       assert table.equals(dataset.to_table())
   
   arrow/python/pyarrow/tests/test_dataset_encryption.py:158: 
   _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
_ _ _ _ _ _ _ _ _ _ _ _ _ 
   pyarrow/_dataset.pyx:562: in pyarrow._dataset.Dataset.to_table
       ???
   pyarrow/_dataset.pyx:3722: in pyarrow._dataset.Scanner.to_table
       ???
   pyarrow/error.pxi:154: in pyarrow.lib.pyarrow_internal_check_status
       ???
   _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
_ _ _ _ _ _ _ _ _ _ _ _ _ 
   
   >   ???
   E   OSError: Failed decryption finalization
   
   pyarrow/error.pxi:91: OSError
   ====================================== short test summary info 
=======================================
   FAILED 
arrow/python/pyarrow/tests/test_dataset_encryption.py::test_dataset_encryption_decryption[uniform_encryption]
 - OSError: Failed decryption finalization
   ============================= 1 failed, 1 passed, 1 deselected in 0.07s 
==============================
   ```
   
   ```text
   ❯ python -m pytest arrow/python/pyarrow/tests/test_dataset_encryption.py -k 
test_dataset_encryption_decryption
   ======================================== test session starts 
=========================================
   platform linux -- Python 3.8.10, pytest-7.4.3, pluggy-1.3.0
   rootdir: /home/tom/Documents/dev/arrow_dev/arrow/python
   configfile: setup.cfg
   plugins: hypothesis-6.92.0, lazy-fixture-0.6.3
   collected 3 items / 1 deselected / 2 selected                                
                        
   
   arrow/python/pyarrow/tests/test_dataset_encryption.py .Fatal Python error: 
Segmentation fault
   
   Thread 0x00007f894e968740 (most recent call first):
     File 
"/home/tom/Documents/dev/arrow_dev/arrow/python/pyarrow/tests/test_dataset_encryption.py",
 line 158 in test_dataset_encryption_decryption
     File 
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/_pytest/python.py",
 line 194 in pytest_pyfunc_call
     File 
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/pluggy/_callers.py",
 line 77 in _multicall
     File 
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/pluggy/_manager.py",
 line 115 in _hookexec
     File 
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/pluggy/_hooks.py",
 line 493 in __call__
     File 
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/_pytest/python.py",
 line 1792 in runtest
     File 
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/_pytest/runner.py",
 line 169 in pytest_runtest_call
     File 
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/pluggy/_callers.py",
 line 77 in _multicall
     File 
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/pluggy/_manager.py",
 line 115 in _hookexec
     File 
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/pluggy/_hooks.py",
 line 493 in __call__
     File 
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/_pytest/runner.py",
 line 262 in <lambda>
     File 
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/_pytest/runner.py",
 line 341 in from_call
     File 
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/_pytest/runner.py",
 line 261 in call_runtest_hook
     File 
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/_pytest/runner.py",
 line 222 in call_and_report
     File 
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/_pytest/runner.py",
 line 133 in runtestprotocol
     File 
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/_pytest/runner.py",
 line 114 in pytest_runtest_protocol
     File 
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/pluggy/_callers.py",
 line 77 in _multicall
     File 
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/pluggy/_manager.py",
 line 115 in _hookexec
     File 
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/pluggy/_hooks.py",
 line 493 in __call__
     File 
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/_pytest/main.py",
 line 350 in pytest_runtestloop
     File 
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/pluggy/_callers.py",
 line 77 in _multicall
     File 
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/pluggy/_manager.py",
 line 115 in _hookexec
     File 
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/pluggy/_hooks.py",
 line 493 in __call__
     File 
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/_pytest/main.py",
 line 325 in _main
     File 
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/_pytest/main.py",
 line 271 in wrap_session
     File 
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/_pytest/main.py",
 line 318 in pytest_cmdline_main
     File 
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/pluggy/_callers.py",
 line 77 in _multicall
     File 
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/pluggy/_manager.py",
 line 115 in _hookexec
     File 
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/pluggy/_hooks.py",
 line 493 in __call__
     File 
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/_pytest/config/__init__.py",
 line 169 in main
     File 
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/_pytest/config/__init__.py",
 line 192 in console_main
     File 
"/home/tom/Documents/dev/arrow_dev/venv/lib/python3.8/site-packages/pytest/__main__.py",
 line 5 in <module>
     File "/usr/lib/python3.8/runpy.py", line 87 in _run_code
     File "/usr/lib/python3.8/runpy.py", line 194 in _run_module_as_main
   ^[[A
   [1]    240505 segmentation fault (core dumped)  python -m pytest 
arrow/python/pyarrow/tests/test_dataset_encryption.py -k 
   ```
   
   
   ```text
   ❯ python -m pytest arrow/python/pyarrow/tests/test_dataset_encryption.py -k 
test_dataset_encryption_decryption
   ======================================== test session starts 
=========================================
   platform linux -- Python 3.8.10, pytest-7.4.3, pluggy-1.3.0
   rootdir: /home/tom/Documents/dev/arrow_dev/arrow/python
   configfile: setup.cfg
   plugins: hypothesis-6.92.0, lazy-fixture-0.6.3
   collected 3 items / 1 deselected / 2 selected                                
                        
   
   arrow/python/pyarrow/tests/test_dataset_encryption.py .F                     
                  [100%]
   
   ============================================== FAILURES 
==============================================
   _______________________ 
test_dataset_encryption_decryption[uniform_encryption] _______________________
   
   encryption_config = <pyarrow._parquet_encryption.EncryptionConfiguration 
object at 0x7f7bc6b8a300>
   
       @pytest.mark.skipif(
           encryption_unavailable, reason="Parquet Encryption is not currently 
enabled"
       )
       @pytest.mark.parametrize(
           "encryption_config",
           [
               create_encryption_config(),
               create_uniform_encryption_config(),
           ],
           ids=["column_keys", "uniform_encryption"],
       )
       def test_dataset_encryption_decryption(encryption_config):
           table = create_sample_table()
       
           decryption_config = create_decryption_config()
           kms_connection_config = create_kms_connection_config()
       
           crypto_factory = pe.CryptoFactory(kms_factory)
           parquet_encryption_cfg = ds.ParquetEncryptionConfig(
               crypto_factory, kms_connection_config, encryption_config
           )
           parquet_decryption_cfg = ds.ParquetDecryptionConfig(
               crypto_factory, kms_connection_config, decryption_config
           )
       
           # create write_options with dataset encryption config
           pformat = pa.dataset.ParquetFileFormat()
           write_options = 
pformat.make_write_options(encryption_config=parquet_encryption_cfg)
       
           mockfs = fs._MockFileSystem()
           mockfs.create_dir("/")
       
           ds.write_dataset(
               data=table,
               base_dir="sample_dataset",
               format=pformat,
               file_options=write_options,
               filesystem=mockfs,
           )
       
           # read without decryption config -> should error is dataset was 
properly encrypted
           pformat = pa.dataset.ParquetFileFormat()
           with pytest.raises(IOError, match=r"no decryption"):
               ds.dataset("sample_dataset", format=pformat, filesystem=mockfs)
       
           # set decryption config for parquet fragment scan options
           pq_scan_opts = ds.ParquetFragmentScanOptions(
               decryption_config=parquet_decryption_cfg
           )
           pformat = 
pa.dataset.ParquetFileFormat(default_fragment_scan_options=pq_scan_opts)
           dataset = ds.dataset("sample_dataset", format=pformat, 
filesystem=mockfs)
       
   >       assert table.equals(dataset.to_table())
   
   arrow/python/pyarrow/tests/test_dataset_encryption.py:158: 
   _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
_ _ _ _ _ _ _ _ _ _ _ _ _ 
   pyarrow/_dataset.pyx:562: in pyarrow._dataset.Dataset.to_table
       ???
   pyarrow/_dataset.pyx:3722: in pyarrow._dataset.Scanner.to_table
       ???
   pyarrow/error.pxi:154: in pyarrow.lib.pyarrow_internal_check_status
       ???
   _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
_ _ _ _ _ _ _ _ _ _ _ _ _ 
   
   >   ???
   E   OSError: Failed decryption finalizationDeserializing page header failed.
   
   pyarrow/error.pxi:91: OSError
   ====================================== short test summary info 
=======================================
   FAILED 
arrow/python/pyarrow/tests/test_dataset_encryption.py::test_dataset_encryption_decryption[uniform_encryption]
 - OSError: Failed decryption finalizationDeserializing page header failed.
   ============================= 1 failed, 1 passed, 1 deselected in 0.07s 
==============================
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to