morningman commented on code in PR #28527: URL: https://github.com/apache/doris/pull/28527#discussion_r1438841331
########## be/src/vec/exec/format/parquet/parquet_bloom_reader.h: ########## @@ -0,0 +1,75 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "io/fs/file_reader.h" + +namespace cctz { +class time_zone; +} // namespace cctz + +namespace doris { +namespace io { +class FileSystem; +struct IOContext; +} // namespace io + +namespace vectorized { +class FileMetaData; +class VExprContext; +class BloomFilter; +} // namespace vectorized +struct TypeDescriptor; +} // namespace doris + +namespace doris::vectorized { + +class RowGroupBloomFilterReader { +public: + virtual ~RowGroupBloomFilterReader() = default; + + virtual std::unique_ptr<BloomFilter> GetColumnBloomFilter(int i) = 0; +}; + +class BloomFilterReader { +public: + virtual ~BloomFilterReader() = default; + + /// \brief Create a BloomFilterReader instance. + /// \returns a BloomFilterReader instance. + /// WARNING: The returned BloomFilterReader references to all the input parameters, so + /// it must not outlive all of the input parameters. Usually these input parameters + /// come from the same ParquetFileReader object, so it must not outlive the reader + /// that creates this BloomFilterReader. + static std::unique_ptr<BloomFilterReader> Make(io::FileReaderSPtr file_reader, Review Comment: ```suggestion static std::unique_ptr<BloomFilterReader> make(io::FileReaderSPtr file_reader, ``` ########## be/src/vec/exec/format/parquet/parquet_bloom.h: ########## @@ -0,0 +1,356 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <io/fs/file_reader.h> +#include <parquet/hasher.h> +#include <parquet/types.h> + +namespace doris::vectorized { +class BloomFilter { +public: + // Maximum Bloom filter size, it sets to HDFS default block size 128MB + // This value will be reconsidered when implementing Bloom filter producer. + static constexpr uint32_t kMaximumBloomFilterBytes = 128 * 1024 * 1024; + + /// Determine whether an element exist in set or not. + /// + /// @param hash the element to contain. + /// @return false if value is definitely not in set, and true means PROBABLY + /// in set. + virtual bool FindHash(uint64_t hash) const = 0; Review Comment: ```suggestion virtual bool find_hash(uint64_t hash) const = 0; ``` Same as other methods' name ########## be/src/vec/exec/format/parquet/parquet_bloom_reader.h: ########## @@ -0,0 +1,75 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "io/fs/file_reader.h" + +namespace cctz { +class time_zone; +} // namespace cctz + +namespace doris { +namespace io { +class FileSystem; +struct IOContext; +} // namespace io + +namespace vectorized { +class FileMetaData; +class VExprContext; +class BloomFilter; +} // namespace vectorized +struct TypeDescriptor; +} // namespace doris + +namespace doris::vectorized { + +class RowGroupBloomFilterReader { +public: + virtual ~RowGroupBloomFilterReader() = default; + + virtual std::unique_ptr<BloomFilter> GetColumnBloomFilter(int i) = 0; +}; + +class BloomFilterReader { +public: + virtual ~BloomFilterReader() = default; + + /// \brief Create a BloomFilterReader instance. + /// \returns a BloomFilterReader instance. + /// WARNING: The returned BloomFilterReader references to all the input parameters, so + /// it must not outlive all of the input parameters. Usually these input parameters + /// come from the same ParquetFileReader object, so it must not outlive the reader + /// that creates this BloomFilterReader. + static std::unique_ptr<BloomFilterReader> Make(io::FileReaderSPtr file_reader, + const tparquet::FileMetaData* file_metadata, + io::IOContext* io_context); + + /// \brief Get the bloom filter reader of a specific row group. + /// \param[in] i row group ordinal to get bloom filter reader. + /// \returns RowGroupBloomFilterReader of the specified row group. A nullptr may or may + /// not be returned if the bloom filter for the row group is unavailable. It + /// is the caller's responsibility to check the return value of follow-up calls + /// to the RowGroupBloomFilterReader. + /// \throws ParquetException if the index is out of bound. + virtual std::shared_ptr<RowGroupBloomFilterReader> RowGroup(int i) = 0; + virtual std::shared_ptr<RowGroupBloomFilterReader> RowGroup( Review Comment: ```suggestion virtual std::shared_ptr<RowGroupBloomFilterReader> row_group( ``` ########## be/src/vec/exec/format/parquet/parquet_bloom_reader.h: ########## @@ -0,0 +1,75 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "io/fs/file_reader.h" + +namespace cctz { +class time_zone; +} // namespace cctz + +namespace doris { +namespace io { +class FileSystem; +struct IOContext; +} // namespace io + +namespace vectorized { +class FileMetaData; +class VExprContext; +class BloomFilter; +} // namespace vectorized +struct TypeDescriptor; +} // namespace doris + +namespace doris::vectorized { + +class RowGroupBloomFilterReader { +public: + virtual ~RowGroupBloomFilterReader() = default; + + virtual std::unique_ptr<BloomFilter> GetColumnBloomFilter(int i) = 0; +}; + +class BloomFilterReader { +public: + virtual ~BloomFilterReader() = default; + + /// \brief Create a BloomFilterReader instance. + /// \returns a BloomFilterReader instance. + /// WARNING: The returned BloomFilterReader references to all the input parameters, so + /// it must not outlive all of the input parameters. Usually these input parameters + /// come from the same ParquetFileReader object, so it must not outlive the reader + /// that creates this BloomFilterReader. + static std::unique_ptr<BloomFilterReader> Make(io::FileReaderSPtr file_reader, + const tparquet::FileMetaData* file_metadata, + io::IOContext* io_context); + + /// \brief Get the bloom filter reader of a specific row group. + /// \param[in] i row group ordinal to get bloom filter reader. + /// \returns RowGroupBloomFilterReader of the specified row group. A nullptr may or may + /// not be returned if the bloom filter for the row group is unavailable. It + /// is the caller's responsibility to check the return value of follow-up calls + /// to the RowGroupBloomFilterReader. + /// \throws ParquetException if the index is out of bound. + virtual std::shared_ptr<RowGroupBloomFilterReader> RowGroup(int i) = 0; Review Comment: ```suggestion virtual std::shared_ptr<RowGroupBloomFilterReader> row_group(int i) = 0; ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
