This is an automated email from the ASF dual-hosted git repository. blue pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/parquet-format.git
The following commit(s) were added to refs/heads/master by this push: new 556ebee PARQUET-1630: add empty compression union for Bloom filter (#149) 556ebee is described below commit 556ebee2107e4223aad40573e27e9f62075dddd7 Author: Jim Apple <jbap...@apache.org> AuthorDate: Tue Aug 13 08:49:12 2019 -0700 PARQUET-1630: add empty compression union for Bloom filter (#149) Right now no compression methods are supported. For more on Bloom filter compression, see Michael Mitzenmacher's "Compressed Bloom Filters", https://www.eecs.harvard.edu/~michaelm/NEWWORK/postscripts/cbf2.pdf --- BloomFilter.md | 10 ++++++++++ src/main/thrift/parquet.thrift | 11 +++++++++++ 2 files changed, 21 insertions(+) diff --git a/BloomFilter.md b/BloomFilter.md index 8ce22ae..e5ec30c 100644 --- a/BloomFilter.md +++ b/BloomFilter.md @@ -264,6 +264,14 @@ union BloomFilterHash { } /** + * The compression used in the Bloom filter. + **/ +struct Uncompressed {} +union BloomFilterCompression { + 1: Uncompressed UNCOMPRESSED; +} + +/** * Bloom filter header is stored at beginning of Bloom filter data of each column * and followed by its bitset. **/ @@ -274,6 +282,8 @@ struct BloomFilterPageHeader { 2: required BloomFilterAlgorithm algorithm; /** The hash function used for Bloom filter. **/ 3: required BloomFilterHash hash; + /** The compression used in the Bloom filter **/ + 4: required BloomFilterCompression compression; } struct ColumnMetaData { diff --git a/src/main/thrift/parquet.thrift b/src/main/thrift/parquet.thrift index da90acd..a062b4f 100644 --- a/src/main/thrift/parquet.thrift +++ b/src/main/thrift/parquet.thrift @@ -582,6 +582,15 @@ union BloomFilterHash { /** xxHash Strategy. **/ 1: XxHash XXHASH; } + +/** + * The compression used in the Bloom filter. + **/ +struct Uncompressed {} +union BloomFilterCompression { + 1: Uncompressed UNCOMPRESSED; +} + /** * Bloom filter header is stored at beginning of Bloom filter data of each column * and followed by its bitset. @@ -593,6 +602,8 @@ struct BloomFilterHeader { 2: required BloomFilterAlgorithm algorithm; /** The hash function used for Bloom filter. **/ 3: required BloomFilterHash hash; + /** The compression used in the Bloom filter **/ + 4: required BloomFilterCompression compression; } struct PageHeader {