This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new b71f629ec8 GH-46349: [Python] Move parquet definitions to 
pyarrow/includes/libparquet.pxd (#46437)
b71f629ec8 is described below

commit b71f629ec868fc8d7bd1fadb680b1c78a07eecd4
Author: Raúl Cumplido <[email protected]>
AuthorDate: Thu May 15 10:28:22 2025 +0200

    GH-46349: [Python] Move parquet definitions to 
pyarrow/includes/libparquet.pxd (#46437)
    
    ### Rationale for this change
    
    Follow the convention we have for Arrow C++ API declarations which are are 
stored in pyarrow/includes/libxxx.pxd, with the specific name depending on the 
C++ component (for example pyarrow/includes/libarrow_dataset.pxd).
    This follows the architecture defined:
    
https://arrow.apache.org/docs/python/getting_involved.html#pyarrow-architecture
    
    ### What changes are included in this PR?
    
    Move the Parquet C++ API declarations to `pyarrow/includes/libparquet.pxd`.
    
    ### Are these changes tested?
    
    Yes via CI and archery extended Python jobs.
    
    ### Are there any user-facing changes?
    
    No
    
    * GitHub Issue: #46349
    
    Authored-by: Raúl Cumplido <[email protected]>
    Signed-off-by: Antoine Pitrou <[email protected]>
---
 python/pyarrow/_parquet.pxd                        | 609 +--------------------
 .../{_parquet.pxd => includes/libparquet.pxd}      | 126 -----
 2 files changed, 1 insertion(+), 734 deletions(-)

diff --git a/python/pyarrow/_parquet.pxd b/python/pyarrow/_parquet.pxd
index 7095b35a2c..1ef575d18f 100644
--- a/python/pyarrow/_parquet.pxd
+++ b/python/pyarrow/_parquet.pxd
@@ -18,608 +18,10 @@
 # distutils: language = c++
 # cython: language_level = 3
 
-from pyarrow.includes.common cimport *
-from pyarrow.includes.libarrow cimport (CChunkedArray, CScalar, CSchema, 
CStatus,
-                                        CTable, CMemoryPool, CBuffer,
-                                        CKeyValueMetadata, CRandomAccessFile,
-                                        COutputStream, CCacheOptions,
-                                        TimeUnit, CRecordBatchReader)
+from pyarrow.includes.libparquet cimport *
 from pyarrow.lib cimport _Weakrefable
 
 
-cdef extern from "parquet/api/schema.h" namespace "parquet::schema" nogil:
-    cdef cppclass Node:
-        pass
-
-    cdef cppclass GroupNode(Node):
-        pass
-
-    cdef cppclass PrimitiveNode(Node):
-        pass
-
-    cdef cppclass ColumnPath:
-        c_string ToDotString()
-        vector[c_string] ToDotVector()
-
-
-cdef extern from "parquet/api/schema.h" namespace "parquet" nogil:
-    enum ParquetType" parquet::Type::type":
-        ParquetType_BOOLEAN" parquet::Type::BOOLEAN"
-        ParquetType_INT32" parquet::Type::INT32"
-        ParquetType_INT64" parquet::Type::INT64"
-        ParquetType_INT96" parquet::Type::INT96"
-        ParquetType_FLOAT" parquet::Type::FLOAT"
-        ParquetType_DOUBLE" parquet::Type::DOUBLE"
-        ParquetType_BYTE_ARRAY" parquet::Type::BYTE_ARRAY"
-        ParquetType_FIXED_LEN_BYTE_ARRAY" parquet::Type::FIXED_LEN_BYTE_ARRAY"
-
-    enum ParquetLogicalTypeId" parquet::LogicalType::Type::type":
-        ParquetLogicalType_UNDEFINED" parquet::LogicalType::Type::UNDEFINED"
-        ParquetLogicalType_STRING" parquet::LogicalType::Type::STRING"
-        ParquetLogicalType_MAP" parquet::LogicalType::Type::MAP"
-        ParquetLogicalType_LIST" parquet::LogicalType::Type::LIST"
-        ParquetLogicalType_ENUM" parquet::LogicalType::Type::ENUM"
-        ParquetLogicalType_DECIMAL" parquet::LogicalType::Type::DECIMAL"
-        ParquetLogicalType_DATE" parquet::LogicalType::Type::DATE"
-        ParquetLogicalType_TIME" parquet::LogicalType::Type::TIME"
-        ParquetLogicalType_TIMESTAMP" parquet::LogicalType::Type::TIMESTAMP"
-        ParquetLogicalType_INT" parquet::LogicalType::Type::INT"
-        ParquetLogicalType_FLOAT16" parquet::LogicalType::Type::FLOAT16"
-        ParquetLogicalType_JSON" parquet::LogicalType::Type::JSON"
-        ParquetLogicalType_BSON" parquet::LogicalType::Type::BSON"
-        ParquetLogicalType_UUID" parquet::LogicalType::Type::UUID"
-        ParquetLogicalType_GEOMETRY" parquet::LogicalType::Type::GEOMETRY"
-        ParquetLogicalType_GEOGRAPHY" parquet::LogicalType::Type::GEOGRAPHY"
-        ParquetLogicalType_NONE" parquet::LogicalType::Type::NONE"
-
-    enum ParquetTimeUnit" parquet::LogicalType::TimeUnit::unit":
-        ParquetTimeUnit_UNKNOWN" parquet::LogicalType::TimeUnit::UNKNOWN"
-        ParquetTimeUnit_MILLIS" parquet::LogicalType::TimeUnit::MILLIS"
-        ParquetTimeUnit_MICROS" parquet::LogicalType::TimeUnit::MICROS"
-        ParquetTimeUnit_NANOS" parquet::LogicalType::TimeUnit::NANOS"
-
-    enum ParquetEdgeInterpolationAlgorithm" 
parquet::LogicalType::EdgeInterpolationAlgorithm":
-        ParquetEdgeInterpolationAlgorithm_UNKNOWN" 
parquet::LogicalType::EdgeInterpolationAlgorithm::UNKNOWN"
-        ParquetEdgeInterpolationAlgorithm_SPHERICAL" 
parquet::LogicalType::EdgeInterpolationAlgorithm::SPHERICAL"
-        ParquetEdgeInterpolationAlgorithm_VINCENTY" 
parquet::LogicalType::EdgeInterpolationAlgorithm::VINCENTY"
-        ParquetEdgeInterpolationAlgorithm_THOMAS" 
parquet::LogicalType::EdgeInterpolationAlgorithm::THOMAS"
-        ParquetEdgeInterpolationAlgorithm_ANDOYER" 
parquet::LogicalType::EdgeInterpolationAlgorithm::ANDOYER"
-        ParquetEdgeInterpolationAlgorithm_KARNEY" 
parquet::LogicalType::EdgeInterpolationAlgorithm::KARNEY"
-
-    enum ParquetConvertedType" parquet::ConvertedType::type":
-        ParquetConvertedType_NONE" parquet::ConvertedType::NONE"
-        ParquetConvertedType_UTF8" parquet::ConvertedType::UTF8"
-        ParquetConvertedType_MAP" parquet::ConvertedType::MAP"
-        ParquetConvertedType_MAP_KEY_VALUE \
-            " parquet::ConvertedType::MAP_KEY_VALUE"
-        ParquetConvertedType_LIST" parquet::ConvertedType::LIST"
-        ParquetConvertedType_ENUM" parquet::ConvertedType::ENUM"
-        ParquetConvertedType_DECIMAL" parquet::ConvertedType::DECIMAL"
-        ParquetConvertedType_DATE" parquet::ConvertedType::DATE"
-        ParquetConvertedType_TIME_MILLIS" parquet::ConvertedType::TIME_MILLIS"
-        ParquetConvertedType_TIME_MICROS" parquet::ConvertedType::TIME_MICROS"
-        ParquetConvertedType_TIMESTAMP_MILLIS \
-            " parquet::ConvertedType::TIMESTAMP_MILLIS"
-        ParquetConvertedType_TIMESTAMP_MICROS \
-            " parquet::ConvertedType::TIMESTAMP_MICROS"
-        ParquetConvertedType_UINT_8" parquet::ConvertedType::UINT_8"
-        ParquetConvertedType_UINT_16" parquet::ConvertedType::UINT_16"
-        ParquetConvertedType_UINT_32" parquet::ConvertedType::UINT_32"
-        ParquetConvertedType_UINT_64" parquet::ConvertedType::UINT_64"
-        ParquetConvertedType_INT_8" parquet::ConvertedType::INT_8"
-        ParquetConvertedType_INT_16" parquet::ConvertedType::INT_16"
-        ParquetConvertedType_INT_32" parquet::ConvertedType::INT_32"
-        ParquetConvertedType_INT_64" parquet::ConvertedType::INT_64"
-        ParquetConvertedType_JSON" parquet::ConvertedType::JSON"
-        ParquetConvertedType_BSON" parquet::ConvertedType::BSON"
-        ParquetConvertedType_INTERVAL" parquet::ConvertedType::INTERVAL"
-
-    enum ParquetRepetition" parquet::Repetition::type":
-        ParquetRepetition_REQUIRED" parquet::REPETITION::REQUIRED"
-        ParquetRepetition_OPTIONAL" parquet::REPETITION::OPTIONAL"
-        ParquetRepetition_REPEATED" parquet::REPETITION::REPEATED"
-
-    enum ParquetEncoding" parquet::Encoding::type":
-        ParquetEncoding_PLAIN" parquet::Encoding::PLAIN"
-        ParquetEncoding_PLAIN_DICTIONARY" parquet::Encoding::PLAIN_DICTIONARY"
-        ParquetEncoding_RLE" parquet::Encoding::RLE"
-        ParquetEncoding_BIT_PACKED" parquet::Encoding::BIT_PACKED"
-        ParquetEncoding_DELTA_BINARY_PACKED \
-            " parquet::Encoding::DELTA_BINARY_PACKED"
-        ParquetEncoding_DELTA_LENGTH_BYTE_ARRAY \
-            " parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY"
-        ParquetEncoding_DELTA_BYTE_ARRAY" parquet::Encoding::DELTA_BYTE_ARRAY"
-        ParquetEncoding_RLE_DICTIONARY" parquet::Encoding::RLE_DICTIONARY"
-        ParquetEncoding_BYTE_STREAM_SPLIT \
-            " parquet::Encoding::BYTE_STREAM_SPLIT"
-
-    enum ParquetCompression" parquet::Compression::type":
-        ParquetCompression_UNCOMPRESSED" parquet::Compression::UNCOMPRESSED"
-        ParquetCompression_SNAPPY" parquet::Compression::SNAPPY"
-        ParquetCompression_GZIP" parquet::Compression::GZIP"
-        ParquetCompression_LZO" parquet::Compression::LZO"
-        ParquetCompression_BROTLI" parquet::Compression::BROTLI"
-        ParquetCompression_LZ4" parquet::Compression::LZ4"
-        ParquetCompression_ZSTD" parquet::Compression::ZSTD"
-
-    enum ParquetVersion" parquet::ParquetVersion::type":
-        ParquetVersion_V1" parquet::ParquetVersion::PARQUET_1_0"
-        ParquetVersion_V2_4" parquet::ParquetVersion::PARQUET_2_4"
-        ParquetVersion_V2_6" parquet::ParquetVersion::PARQUET_2_6"
-
-    enum ParquetSortOrder" parquet::SortOrder::type":
-        ParquetSortOrder_SIGNED" parquet::SortOrder::SIGNED"
-        ParquetSortOrder_UNSIGNED" parquet::SortOrder::UNSIGNED"
-        ParquetSortOrder_UNKNOWN" parquet::SortOrder::UNKNOWN"
-
-    cdef cppclass CParquetLogicalType" parquet::LogicalType":
-        c_string ToString() const
-        c_string ToJSON() const
-        ParquetLogicalTypeId type() const
-
-    cdef cppclass CParquetDecimalType \
-            " parquet::DecimalLogicalType"(CParquetLogicalType):
-        int32_t precision() const
-        int32_t scale() const
-
-    cdef cppclass CParquetIntType \
-            " parquet::IntLogicalType"(CParquetLogicalType):
-        int bit_width() const
-        c_bool is_signed() const
-
-    cdef cppclass CParquetTimeType \
-            " parquet::TimeLogicalType"(CParquetLogicalType):
-        c_bool is_adjusted_to_utc() const
-        ParquetTimeUnit time_unit() const
-
-    cdef cppclass CParquetTimestampType \
-            " parquet::TimestampLogicalType"(CParquetLogicalType):
-        c_bool is_adjusted_to_utc() const
-        ParquetTimeUnit time_unit() const
-
-    cdef cppclass CParquetGeometryType \
-            " parquet::GeometryLogicalType"(CParquetLogicalType):
-        c_string crs() const
-
-    cdef cppclass CParquetGeographyType \
-            " parquet::GeographyLogicalType"(CParquetLogicalType):
-        c_string crs() const
-        ParquetEdgeInterpolationAlgorithm algorithm() const
-
-    cdef cppclass ColumnDescriptor" parquet::ColumnDescriptor":
-        c_bool Equals(const ColumnDescriptor& other)
-
-        shared_ptr[ColumnPath] path()
-        int16_t max_definition_level()
-        int16_t max_repetition_level()
-
-        ParquetType physical_type()
-        const shared_ptr[const CParquetLogicalType]& logical_type()
-        ParquetConvertedType converted_type()
-        const c_string& name()
-        int type_length()
-        int type_precision()
-        int type_scale()
-
-    cdef cppclass SchemaDescriptor:
-        const ColumnDescriptor* Column(int i)
-        shared_ptr[Node] schema()
-        GroupNode* group()
-        c_bool Equals(const SchemaDescriptor& other)
-        c_string ToString()
-        int num_columns()
-
-    cdef c_string FormatStatValue(ParquetType parquet_type, c_string val)
-
-    enum ParquetCipher" parquet::ParquetCipher::type":
-        ParquetCipher_AES_GCM_V1" parquet::ParquetCipher::AES_GCM_V1"
-        ParquetCipher_AES_GCM_CTR_V1" parquet::ParquetCipher::AES_GCM_CTR_V1"
-
-    struct AadMetadata:
-        c_string aad_prefix
-        c_string aad_file_unique
-        c_bool supply_aad_prefix
-
-    struct EncryptionAlgorithm:
-        ParquetCipher algorithm
-        AadMetadata aad
-
-
-# Specific array<> types needed for GeoStatistics
-cdef extern from "<array>" namespace "std" nogil:
-    cdef cppclass double_array4 "std::array<double, 4>":
-        double_array4() except +
-        double& operator[](size_t)
-
-    cdef cppclass bool_array4 "std::array<bool, 4>":
-        bool_array4() except +
-        c_bool& operator[](size_t)
-
-
-cdef extern from "parquet/geospatial/statistics.h" namespace "parquet" nogil:
-
-    cdef cppclass CParquetGeoStatistics" parquet::geospatial::GeoStatistics":
-        c_bool is_valid() const
-
-        double_array4 lower_bound() const
-        double_array4 upper_bound() const
-        bool_array4 dimension_valid() const
-        bool_array4 dimension_empty() const
-
-        optional[vector[int32_t]] geometry_types() const
-
-        c_string ToString() const
-
-
-cdef extern from "parquet/api/reader.h" namespace "parquet" nogil:
-    cdef cppclass ColumnReader:
-        pass
-
-    cdef cppclass BoolReader(ColumnReader):
-        pass
-
-    cdef cppclass Int32Reader(ColumnReader):
-        pass
-
-    cdef cppclass Int64Reader(ColumnReader):
-        pass
-
-    cdef cppclass Int96Reader(ColumnReader):
-        pass
-
-    cdef cppclass FloatReader(ColumnReader):
-        pass
-
-    cdef cppclass DoubleReader(ColumnReader):
-        pass
-
-    cdef cppclass ByteArrayReader(ColumnReader):
-        pass
-
-    cdef cppclass RowGroupReader:
-        pass
-
-    cdef cppclass CEncodedStatistics" parquet::EncodedStatistics":
-        const c_string& max() const
-        const c_string& min() const
-        int64_t null_count
-        int64_t distinct_count
-        bint has_min
-        bint has_max
-        bint has_null_count
-        bint has_distinct_count
-
-    cdef cppclass ParquetByteArray" parquet::ByteArray":
-        uint32_t len
-        const uint8_t* ptr
-
-    cdef cppclass ParquetFLBA" parquet::FLBA":
-        const uint8_t* ptr
-
-    cdef cppclass CStatistics" parquet::Statistics":
-        int64_t null_count() const
-        int64_t distinct_count() const
-        int64_t num_values() const
-        bint HasMinMax()
-        bint HasNullCount()
-        bint HasDistinctCount()
-        c_bool Equals(const CStatistics&) const
-        void Reset()
-        c_string EncodeMin()
-        c_string EncodeMax()
-        CEncodedStatistics Encode()
-        void SetComparator()
-        ParquetType physical_type() const
-        const ColumnDescriptor* descr() const
-
-    cdef cppclass CBoolStatistics" parquet::BoolStatistics"(CStatistics):
-        c_bool min()
-        c_bool max()
-
-    cdef cppclass CInt32Statistics" parquet::Int32Statistics"(CStatistics):
-        int32_t min()
-        int32_t max()
-
-    cdef cppclass CInt64Statistics" parquet::Int64Statistics"(CStatistics):
-        int64_t min()
-        int64_t max()
-
-    cdef cppclass CFloatStatistics" parquet::FloatStatistics"(CStatistics):
-        float min()
-        float max()
-
-    cdef cppclass CDoubleStatistics" parquet::DoubleStatistics"(CStatistics):
-        double min()
-        double max()
-
-    cdef cppclass CByteArrayStatistics \
-            " parquet::ByteArrayStatistics"(CStatistics):
-        ParquetByteArray min()
-        ParquetByteArray max()
-
-    cdef cppclass CFLBAStatistics" parquet::FLBAStatistics"(CStatistics):
-        ParquetFLBA min()
-        ParquetFLBA max()
-
-    cdef cppclass CColumnCryptoMetaData" parquet::ColumnCryptoMetaData":
-        shared_ptr[ColumnPath] path_in_schema() const
-        c_bool encrypted_with_footer_key() const
-        const c_string& key_metadata() const
-
-    cdef cppclass ParquetIndexLocation" parquet::IndexLocation":
-        int64_t offset
-        int32_t length
-
-    cdef cppclass CColumnChunkMetaData" parquet::ColumnChunkMetaData":
-        int64_t file_offset() const
-        const c_string& file_path() const
-
-        c_bool is_metadata_set() const
-        ParquetType type() const
-        int64_t num_values() const
-        shared_ptr[ColumnPath] path_in_schema() const
-        bint is_stats_set() const
-        shared_ptr[CStatistics] statistics() const
-        c_bool is_geo_stats_set() const
-        shared_ptr[CParquetGeoStatistics] geo_statistics() const
-        ParquetCompression compression() const
-        const vector[ParquetEncoding]& encodings() const
-        c_bool Equals(const CColumnChunkMetaData&) const
-
-        int64_t has_dictionary_page() const
-        int64_t dictionary_page_offset() const
-        int64_t data_page_offset() const
-        int64_t index_page_offset() const
-        int64_t total_compressed_size() const
-        int64_t total_uncompressed_size() const
-        unique_ptr[CColumnCryptoMetaData] crypto_metadata() const
-        optional[ParquetIndexLocation] GetColumnIndexLocation() const
-        optional[ParquetIndexLocation] GetOffsetIndexLocation() const
-        shared_ptr[const CKeyValueMetadata] key_value_metadata() const
-
-    struct CSortingColumn" parquet::SortingColumn":
-        int column_idx
-        c_bool descending
-        c_bool nulls_first
-
-    cdef cppclass CRowGroupMetaData" parquet::RowGroupMetaData":
-        c_bool Equals(const CRowGroupMetaData&) const
-        int num_columns() const
-        int64_t num_rows() const
-        int64_t total_byte_size() const
-        vector[CSortingColumn] sorting_columns() const
-        unique_ptr[CColumnChunkMetaData] ColumnChunk(int i) const
-
-    cdef cppclass CFileMetaData" parquet::FileMetaData":
-        c_bool Equals(const CFileMetaData&) const
-        uint32_t size()
-        int num_columns()
-        int64_t num_rows()
-        int num_row_groups()
-        ParquetVersion version()
-        const c_string created_by()
-        int num_schema_elements()
-
-        void set_file_path(const c_string& path)
-        void AppendRowGroups(const CFileMetaData& other) except +
-
-        unique_ptr[CRowGroupMetaData] RowGroup(int i)
-        const SchemaDescriptor* schema()
-        shared_ptr[const CKeyValueMetadata] key_value_metadata() const
-        void WriteTo(COutputStream* dst) const
-
-        inline c_bool is_encryption_algorithm_set() const
-        inline EncryptionAlgorithm encryption_algorithm() const
-        inline const c_string& footer_signing_key_metadata() const
-
-    cdef shared_ptr[CFileMetaData] CFileMetaData_Make \
-        " parquet::FileMetaData::Make"(const void* serialized_metadata,
-                                       uint32_t* metadata_len)
-
-    cdef cppclass CReaderProperties" parquet::ReaderProperties":
-        c_bool is_buffered_stream_enabled() const
-        void enable_buffered_stream()
-        void disable_buffered_stream()
-
-        void set_buffer_size(int64_t buf_size)
-        int64_t buffer_size() const
-
-        void set_thrift_string_size_limit(int32_t size)
-        int32_t thrift_string_size_limit() const
-
-        void set_thrift_container_size_limit(int32_t size)
-        int32_t thrift_container_size_limit() const
-
-        void file_decryption_properties(shared_ptr[CFileDecryptionProperties]
-                                        decryption)
-        shared_ptr[CFileDecryptionProperties] file_decryption_properties() \
-            const
-
-        c_bool page_checksum_verification() const
-        void set_page_checksum_verification(c_bool check_crc)
-
-    CReaderProperties default_reader_properties()
-
-    cdef cppclass ArrowReaderProperties:
-        ArrowReaderProperties()
-        void set_read_dictionary(int column_index, c_bool read_dict)
-        c_bool read_dictionary(int column_index)
-        void set_batch_size(int64_t batch_size)
-        int64_t batch_size()
-        void set_pre_buffer(c_bool pre_buffer)
-        c_bool pre_buffer() const
-        void set_cache_options(CCacheOptions options)
-        CCacheOptions cache_options() const
-        void set_coerce_int96_timestamp_unit(TimeUnit unit)
-        TimeUnit coerce_int96_timestamp_unit() const
-        void set_arrow_extensions_enabled(c_bool extensions_enabled)
-        c_bool get_arrow_extensions_enabled() const
-
-    ArrowReaderProperties default_arrow_reader_properties()
-
-    cdef cppclass ParquetFileReader:
-        shared_ptr[CFileMetaData] metadata()
-
-
-cdef extern from "parquet/api/writer.h" namespace "parquet" nogil:
-    cdef cppclass CdcOptions:
-        int64_t min_chunk_size
-        int64_t max_chunk_size
-        int norm_level
-
-    cdef cppclass WriterProperties:
-        cppclass Builder:
-            Builder* data_page_version(ParquetDataPageVersion version)
-            Builder* version(ParquetVersion version)
-            Builder* compression(ParquetCompression codec)
-            Builder* compression(const c_string& path,
-                                 ParquetCompression codec)
-            Builder* compression_level(int compression_level)
-            Builder* compression_level(const c_string& path,
-                                       int compression_level)
-            Builder* encryption(
-                shared_ptr[CFileEncryptionProperties]
-                file_encryption_properties)
-            Builder* disable_dictionary()
-            Builder* enable_dictionary()
-            Builder* enable_dictionary(const c_string& path)
-            Builder* set_sorting_columns(vector[CSortingColumn] 
sorting_columns)
-            Builder* disable_statistics()
-            Builder* enable_statistics()
-            Builder* enable_statistics(const c_string& path)
-            Builder* enable_store_decimal_as_integer()
-            Builder* disable_store_decimal_as_integer()
-            Builder* data_pagesize(int64_t size)
-            Builder* encoding(ParquetEncoding encoding)
-            Builder* encoding(const c_string& path,
-                              ParquetEncoding encoding)
-            Builder* max_row_group_length(int64_t size)
-            Builder* write_batch_size(int64_t batch_size)
-            Builder* dictionary_pagesize_limit(int64_t 
dictionary_pagesize_limit)
-            Builder* enable_write_page_index()
-            Builder* disable_write_page_index()
-            Builder* enable_page_checksum()
-            Builder* disable_page_checksum()
-            Builder* enable_content_defined_chunking()
-            Builder* disable_content_defined_chunking()
-            Builder* content_defined_chunking_options(CdcOptions options)
-            shared_ptr[WriterProperties] build()
-
-    cdef cppclass ArrowWriterProperties:
-        cppclass Builder:
-            Builder()
-            Builder* disable_deprecated_int96_timestamps()
-            Builder* enable_deprecated_int96_timestamps()
-            Builder* coerce_timestamps(TimeUnit unit)
-            Builder* allow_truncated_timestamps()
-            Builder* disallow_truncated_timestamps()
-            Builder* store_schema()
-            Builder* enable_compliant_nested_types()
-            Builder* disable_compliant_nested_types()
-            Builder* set_engine_version(ArrowWriterEngineVersion version)
-            shared_ptr[ArrowWriterProperties] build()
-        c_bool support_deprecated_int96_timestamps()
-
-cdef extern from "parquet/arrow/reader.h" namespace "parquet::arrow" nogil:
-    cdef cppclass FileReader:
-        FileReader(CMemoryPool* pool, unique_ptr[ParquetFileReader] reader)
-
-        CStatus GetSchema(shared_ptr[CSchema]* out)
-
-        CStatus ReadColumn(int i, shared_ptr[CChunkedArray]* out)
-        CStatus ReadSchemaField(int i, shared_ptr[CChunkedArray]* out)
-
-        int num_row_groups()
-        CStatus ReadRowGroup(int i, shared_ptr[CTable]* out)
-        CStatus ReadRowGroup(int i, const vector[int]& column_indices,
-                             shared_ptr[CTable]* out)
-
-        CStatus ReadRowGroups(const vector[int]& row_groups,
-                              shared_ptr[CTable]* out)
-        CStatus ReadRowGroups(const vector[int]& row_groups,
-                              const vector[int]& column_indices,
-                              shared_ptr[CTable]* out)
-
-        CResult[unique_ptr[CRecordBatchReader]] GetRecordBatchReader(const 
vector[int]& row_group_indices,
-                                                                     const 
vector[int]& column_indices)
-        CResult[unique_ptr[CRecordBatchReader]] GetRecordBatchReader(const 
vector[int]& row_group_indices)
-
-        CStatus ReadTable(shared_ptr[CTable]* out)
-        CStatus ReadTable(const vector[int]& column_indices,
-                          shared_ptr[CTable]* out)
-
-        CStatus ScanContents(vector[int] columns, int32_t column_batch_size,
-                             int64_t* num_rows)
-
-        const ParquetFileReader* parquet_reader()
-
-        void set_use_threads(c_bool use_threads)
-
-        void set_batch_size(int64_t batch_size)
-
-    cdef cppclass FileReaderBuilder:
-        FileReaderBuilder()
-        CStatus Open(const shared_ptr[CRandomAccessFile]& file,
-                     const CReaderProperties& properties,
-                     const shared_ptr[CFileMetaData]& metadata)
-
-        ParquetFileReader* raw_reader()
-        FileReaderBuilder* memory_pool(CMemoryPool*)
-        FileReaderBuilder* properties(const ArrowReaderProperties&)
-        CStatus Build(unique_ptr[FileReader]* out)
-
-    CStatus FromParquetSchema(
-        const SchemaDescriptor* parquet_schema,
-        const ArrowReaderProperties& properties,
-        const shared_ptr[const CKeyValueMetadata]& key_value_metadata,
-        shared_ptr[CSchema]* out)
-
-    CStatus StatisticsAsScalars(const CStatistics& Statistics,
-                                shared_ptr[CScalar]* min,
-                                shared_ptr[CScalar]* max)
-
-cdef extern from "parquet/arrow/schema.h" namespace "parquet::arrow" nogil:
-
-    CStatus ToParquetSchema(
-        const CSchema* arrow_schema,
-        const WriterProperties& properties,
-        const ArrowWriterProperties& arrow_properties,
-        shared_ptr[SchemaDescriptor]* out)
-
-
-cdef extern from "parquet/properties.h" namespace "parquet" nogil:
-    cdef enum ArrowWriterEngineVersion:
-        V1 "parquet::ArrowWriterProperties::V1",
-        V2 "parquet::ArrowWriterProperties::V2"
-
-    cdef cppclass ParquetDataPageVersion:
-        pass
-
-    cdef ParquetDataPageVersion ParquetDataPageVersion_V1 \
-        " parquet::ParquetDataPageVersion::V1"
-    cdef ParquetDataPageVersion ParquetDataPageVersion_V2 \
-        " parquet::ParquetDataPageVersion::V2"
-
-cdef extern from "parquet/arrow/writer.h" namespace "parquet::arrow" nogil:
-    cdef cppclass FileWriter:
-
-        @staticmethod
-        CResult[unique_ptr[FileWriter]] Open(const CSchema& schema, 
CMemoryPool* pool,
-                                             const shared_ptr[COutputStream]& 
sink,
-                                             const 
shared_ptr[WriterProperties]& properties,
-                                             const 
shared_ptr[ArrowWriterProperties]& arrow_properties)
-
-        CStatus WriteTable(const CTable& table, int64_t chunk_size)
-        CStatus NewRowGroup()
-        CStatus Close()
-        CStatus AddKeyValueMetadata(const shared_ptr[const CKeyValueMetadata]& 
key_value_metadata)
-
-        const shared_ptr[CFileMetaData] metadata() const
-
-    CStatus WriteMetaDataFile(
-        const CFileMetaData& file_metadata,
-        const COutputStream* sink)
-
 cdef class FileEncryptionProperties:
     """File-level encryption properties for the low-level API"""
     cdef:
@@ -728,15 +130,6 @@ cdef class GeoStatistics(_Weakrefable):
         self.statistics = statistics
         self.parent = parent
 
-cdef extern from "parquet/encryption/encryption.h" namespace "parquet" nogil:
-    cdef cppclass CFileDecryptionProperties\
-            " parquet::FileDecryptionProperties":
-        pass
-
-    cdef cppclass CFileEncryptionProperties\
-            " parquet::FileEncryptionProperties":
-        pass
-
 cdef class FileDecryptionProperties:
     """File-level decryption properties for the low-level API"""
     cdef:
diff --git a/python/pyarrow/_parquet.pxd 
b/python/pyarrow/includes/libparquet.pxd
similarity index 86%
copy from python/pyarrow/_parquet.pxd
copy to python/pyarrow/includes/libparquet.pxd
index 7095b35a2c..4851c191db 100644
--- a/python/pyarrow/_parquet.pxd
+++ b/python/pyarrow/includes/libparquet.pxd
@@ -16,7 +16,6 @@
 # under the License.
 
 # distutils: language = c++
-# cython: language_level = 3
 
 from pyarrow.includes.common cimport *
 from pyarrow.includes.libarrow cimport (CChunkedArray, CScalar, CSchema, 
CStatus,
@@ -24,7 +23,6 @@ from pyarrow.includes.libarrow cimport (CChunkedArray, 
CScalar, CSchema, CStatus
                                         CKeyValueMetadata, CRandomAccessFile,
                                         COutputStream, CCacheOptions,
                                         TimeUnit, CRecordBatchReader)
-from pyarrow.lib cimport _Weakrefable
 
 
 cdef extern from "parquet/api/schema.h" namespace "parquet::schema" nogil:
@@ -620,114 +618,6 @@ cdef extern from "parquet/arrow/writer.h" namespace 
"parquet::arrow" nogil:
         const CFileMetaData& file_metadata,
         const COutputStream* sink)
 
-cdef class FileEncryptionProperties:
-    """File-level encryption properties for the low-level API"""
-    cdef:
-        shared_ptr[CFileEncryptionProperties] properties
-
-    @staticmethod
-    cdef inline FileEncryptionProperties wrap(
-            shared_ptr[CFileEncryptionProperties] properties):
-
-        result = FileEncryptionProperties()
-        result.properties = properties
-        return result
-
-    cdef inline shared_ptr[CFileEncryptionProperties] unwrap(self):
-        return self.properties
-
-cdef shared_ptr[WriterProperties] _create_writer_properties(
-    use_dictionary=*,
-    compression=*,
-    version=*,
-    write_statistics=*,
-    data_page_size=*,
-    compression_level=*,
-    use_byte_stream_split=*,
-    column_encoding=*,
-    data_page_version=*,
-    FileEncryptionProperties encryption_properties=*,
-    write_batch_size=*,
-    dictionary_pagesize_limit=*,
-    write_page_index=*,
-    write_page_checksum=*,
-    sorting_columns=*,
-    store_decimal_as_integer=*,
-    use_content_defined_chunking=*
-) except *
-
-
-cdef shared_ptr[ArrowWriterProperties] _create_arrow_writer_properties(
-    use_deprecated_int96_timestamps=*,
-    coerce_timestamps=*,
-    allow_truncated_timestamps=*,
-    writer_engine_version=*,
-    use_compliant_nested_type=*,
-    store_schema=*,
-) except *
-
-cdef class ParquetSchema(_Weakrefable):
-    cdef:
-        FileMetaData parent  # the FileMetaData owning the SchemaDescriptor
-        const SchemaDescriptor* schema
-
-cdef class FileMetaData(_Weakrefable):
-    cdef:
-        shared_ptr[CFileMetaData] sp_metadata
-        CFileMetaData* _metadata
-        ParquetSchema _schema
-
-    cdef inline init(self, const shared_ptr[CFileMetaData]& metadata):
-        self.sp_metadata = metadata
-        self._metadata = metadata.get()
-
-cdef class RowGroupMetaData(_Weakrefable):
-    cdef:
-        int index  # for pickling support
-        unique_ptr[CRowGroupMetaData] up_metadata
-        CRowGroupMetaData* metadata
-        FileMetaData parent
-
-    cdef inline init(self, FileMetaData parent, int index):
-        if index < 0 or index >= parent.num_row_groups:
-            raise IndexError('{0} out of bounds'.format(index))
-        self.up_metadata = parent._metadata.RowGroup(index)
-        self.metadata = self.up_metadata.get()
-        self.parent = parent
-        self.index = index
-
-
-cdef class ColumnChunkMetaData(_Weakrefable):
-    cdef:
-        unique_ptr[CColumnChunkMetaData] up_metadata
-        CColumnChunkMetaData* metadata
-        RowGroupMetaData parent
-
-    cdef inline init(self, RowGroupMetaData parent, int i):
-        self.up_metadata = parent.metadata.ColumnChunk(i)
-        self.metadata = self.up_metadata.get()
-        self.parent = parent
-
-cdef class Statistics(_Weakrefable):
-    cdef:
-        shared_ptr[CStatistics] statistics
-        ColumnChunkMetaData parent
-
-    cdef inline init(self, const shared_ptr[CStatistics]& statistics,
-                     ColumnChunkMetaData parent):
-        self.statistics = statistics
-        self.parent = parent
-
-cdef class GeoStatistics(_Weakrefable):
-    cdef:
-        shared_ptr[CParquetGeoStatistics] statistics
-        ColumnChunkMetaData parent
-
-    cdef inline init(self, const shared_ptr[CParquetGeoStatistics]& statistics,
-                     ColumnChunkMetaData parent):
-        self.statistics = statistics
-        self.parent = parent
-
 cdef extern from "parquet/encryption/encryption.h" namespace "parquet" nogil:
     cdef cppclass CFileDecryptionProperties\
             " parquet::FileDecryptionProperties":
@@ -736,19 +626,3 @@ cdef extern from "parquet/encryption/encryption.h" 
namespace "parquet" nogil:
     cdef cppclass CFileEncryptionProperties\
             " parquet::FileEncryptionProperties":
         pass
-
-cdef class FileDecryptionProperties:
-    """File-level decryption properties for the low-level API"""
-    cdef:
-        shared_ptr[CFileDecryptionProperties] properties
-
-    @staticmethod
-    cdef inline FileDecryptionProperties wrap(
-            shared_ptr[CFileDecryptionProperties] properties):
-
-        result = FileDecryptionProperties()
-        result.properties = properties
-        return result
-
-    cdef inline shared_ptr[CFileDecryptionProperties] unwrap(self):
-        return self.properties

Reply via email to