emkornfield commented on code in PR #37400: URL: https://github.com/apache/arrow/pull/37400#discussion_r1342149379
########## cpp/src/parquet/metadata.h: ########## @@ -508,21 +507,32 @@ class PARQUET_EXPORT RowGroupMetaDataBuilder { std::unique_ptr<RowGroupMetaDataBuilderImpl> impl_; }; +/// Alias type of page index and bloom filter location of a row group. The index +/// location is located by column ordinal. If the column does not have the index, +/// its value is set to std::nullopt. +using RowGroupIndexLocation = std::vector<std::optional<IndexLocation>>; + +/// Alias type of page index and bloom filter location of a parquet file. The +/// index location is located by the row group ordinal. +using FileIndexLocation = std::map<size_t, RowGroupIndexLocation>; + /// \brief Public struct for location to all page indexes in a parquet file. struct PageIndexLocation { - /// Alias type of page index location of a row group. The index location - /// is located by column ordinal. If the column does not have the page index, - /// its value is set to std::nullopt. - using RowGroupIndexLocation = std::vector<std::optional<IndexLocation>>; - /// Alias type of page index location of a parquet file. The index location - /// is located by the row group ordinal. - using FileIndexLocation = std::map<size_t, RowGroupIndexLocation>; /// Row group column index locations which uses row group ordinal as the key. FileIndexLocation column_index_location; /// Row group offset index locations which uses row group ordinal as the key. FileIndexLocation offset_index_location; }; +/// \brief Public struct for location to all bloom filters in a parquet file. +struct BloomFilterLocation { + /// Row group bloom filter index locations which uses row group ordinal as the key. + /// + /// Note: Before Parquet 2.10, the bloom filter index only have "offset". But here + /// we use "IndexLocation" with length to support the future extension. + FileIndexLocation bloom_filter_location; Review Comment: instead of moving these to a top level just to use in the BloomFilterLocation, I think we should make the BloomFilterDataLocation or something similar its own type to allow flexibility and make naming clearer. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org