This is an automated email from the ASF dual-hosted git repository.
awong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push:
new 60ea656 util: helper class for working with bitsets
60ea656 is described below
commit 60ea656b314a0daec41a0e14fd25ff2c55c3f4b5
Author: Andrew Wong <[email protected]>
AuthorDate: Mon Mar 25 19:00:21 2019 -0700
util: helper class for working with bitsets
Adds a templatized helper class to facilitate working with bitsets. This
is just a wrapper around std::bitset, but it exposes a more
container-like interface. This is particularly useful for specifying
containers of enum types and the like (e.g. rather than using a hashed
container).
This also adds an iterator class for the new wrapper; such an iterator
apparently doesn't exist[1] in the STL bitset class.
[1] https://stackoverflow.com/a/34728458
Change-Id: Ie0344fe94e9f9da9651164cb1b456c92d99dbdf4
Reviewed-on: http://gerrit.cloudera.org:8080/12866
Tested-by: Kudu Jenkins
Reviewed-by: Adar Dembo <[email protected]>
Reviewed-by: Alexey Serbin <[email protected]>
---
src/kudu/util/CMakeLists.txt | 2 +-
src/kudu/util/bitset-test.cc | 170 ++++++++++++++++++++++++++++++++++++
src/kudu/util/bitset.h | 202 +++++++++++++++++++++++++++++++++++++++++++
3 files changed, 373 insertions(+), 1 deletion(-)
diff --git a/src/kudu/util/CMakeLists.txt b/src/kudu/util/CMakeLists.txt
index d08e0ee..312739e 100644
--- a/src/kudu/util/CMakeLists.txt
+++ b/src/kudu/util/CMakeLists.txt
@@ -149,7 +149,6 @@ set(UTIL_SRCS
bitmap.cc
block_cache_metrics.cc
bloom_filter.cc
- bitmap.cc
cache.cc
coding.cc
condition_variable.cc
@@ -394,6 +393,7 @@ ADD_KUDU_TEST(async_util-test)
ADD_KUDU_TEST(atomic-test)
ADD_KUDU_TEST(bit-util-test)
ADD_KUDU_TEST(bitmap-test)
+ADD_KUDU_TEST(bitset-test)
ADD_KUDU_TEST(blocking_queue-test)
ADD_KUDU_TEST(bloom_filter-test)
ADD_KUDU_TEST(cache-bench RUN_SERIAL true)
diff --git a/src/kudu/util/bitset-test.cc b/src/kudu/util/bitset-test.cc
new file mode 100644
index 0000000..b12fd16
--- /dev/null
+++ b/src/kudu/util/bitset-test.cc
@@ -0,0 +1,170 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "kudu/util/bitset.h"
+
+#include <cstddef>
+#include <set>
+#include <string>
+#include <utility>
+
+#include <glog/logging.h>
+#include <gtest/gtest.h>
+
+#include "kudu/gutil/map-util.h"
+#include "kudu/gutil/strings/substitute.h"
+#include "kudu/util/test_macros.h"
+
+using std::set;
+using std::string;
+using strings::Substitute;
+
+namespace {
+
+enum TestEnum {
+ BEGINNING,
+ MIDDLE,
+ END,
+};
+constexpr size_t kMaxEnumVal = TestEnum::END + 1;
+
+typedef set<TestEnum> EnumSet;
+typedef FixedBitSet<TestEnum, kMaxEnumVal> EnumBitSet;
+
+const EnumSet kFullEnumSet({
+ TestEnum::BEGINNING,
+ TestEnum::MIDDLE,
+ TestEnum::END,
+});
+
+bool CompareContainers(EnumSet enum_set, EnumBitSet ebs) {
+ if (enum_set.empty() != ebs.empty()) {
+ LOG(INFO) << Substitute("enum set is $0, bitset is $1",
+ enum_set.empty() ? "empty" : "not empty",
+ ebs.empty() ? "empty" : "not empty");
+ return false;
+ }
+ if (enum_set.size() != ebs.size()) {
+ LOG(INFO) << Substitute("enum set has $0 elements, bitset has $1",
+ enum_set.size(), ebs.size());
+ return false;
+ }
+ for (const auto& e : enum_set) {
+ if (!ContainsKey(ebs, e)) {
+ LOG(INFO) << Substitute("enum set contains $0, not found in bitset", e);
+ return false;
+ }
+ }
+ for (TestEnum e : ebs) {
+ if (!ContainsKey(enum_set, e)) {
+ LOG(INFO) << Substitute("bitset contains $0, not found in enum set", e);
+ return false;
+ }
+ }
+ return true;
+}
+
+} // anonymous namespace
+
+TEST(BitSetTest, TestConstruction) {
+ EnumSet enum_set;
+ for (int i = 0; i < kMaxEnumVal; i++) {
+ InsertOrDie(&enum_set, static_cast<TestEnum>(i));
+ EnumBitSet ebs(enum_set);
+ ASSERT_TRUE(CompareContainers(enum_set, ebs));
+ }
+}
+
+TEST(BitSetTest, TestInitializerList) {
+ EnumBitSet ebs({ TestEnum::BEGINNING });
+ ASSERT_TRUE(ContainsKey(ebs, TestEnum::BEGINNING));
+ ASSERT_EQ(1, ebs.size());
+}
+
+// Test basic operations for a bitset of enums, comparing is to an STL
+// container of enums.
+TEST(BitSetTest, TestBasicOperations) {
+ EnumBitSet bitset;
+ ASSERT_TRUE(bitset.empty());
+
+ EnumSet enum_set;
+ const auto add_to_containers = [&] (TestEnum e) {
+ ASSERT_EQ(InsertIfNotPresent(&enum_set, e),
+ InsertIfNotPresent(&bitset, e));
+ };
+ const auto remove_from_containers = [&] (TestEnum e) {
+ ASSERT_EQ(enum_set.erase(e), bitset.erase(e));
+ };
+ // Insert all elements, checking to make sure our containers' contents remain
+ // the same.
+ for (const auto& e : kFullEnumSet) {
+ NO_FATALS(add_to_containers(e));
+ ASSERT_TRUE(CompareContainers(enum_set, bitset));
+ }
+
+ // Do a sanity check that we can't insert something that already exists in
+ // the set.
+ ASSERT_FALSE(InsertIfNotPresent(&bitset, TestEnum::BEGINNING));
+
+ // Now remove all elements.
+ for (const auto& e : kFullEnumSet) {
+ NO_FATALS(remove_from_containers(e));
+ ASSERT_TRUE(CompareContainers(enum_set, bitset));
+ }
+
+ // Do a final sanity check that the bitset looks how we expect.
+ ASSERT_TRUE(CompareContainers(enum_set, bitset));
+ ASSERT_TRUE(bitset.empty());
+}
+
+// Test the set's insert interface.
+TEST(BitSetTest, TestInsert) {
+ EnumBitSet bitset;
+ {
+ auto iter_and_inserted = bitset.insert(TestEnum::BEGINNING);
+ ASSERT_EQ(TestEnum::BEGINNING, *iter_and_inserted.first);
+ ASSERT_TRUE(iter_and_inserted.second);
+ }
+ {
+ auto iter_and_inserted = bitset.insert(TestEnum::BEGINNING);
+ ASSERT_EQ(TestEnum::BEGINNING, *iter_and_inserted.first);
+ ASSERT_FALSE(iter_and_inserted.second);
+ }
+}
+
+#ifndef NDEBUG
+// Make sure we hit a check failure if we attempt to use the bitset with values
+// outside of its range.
+TEST(BitSetDeathTest, TestInvalidUsage) {
+ const string kDeathMessage = "Check failed";
+ const TestEnum kOutOfRange = TestEnum(kMaxEnumVal);
+ EnumBitSet bitset;
+ EXPECT_DEATH({
+ bitset.insert(kOutOfRange);
+ }, kDeathMessage);
+
+ EXPECT_DEATH({
+ bitset.erase(kOutOfRange);
+ }, kDeathMessage);
+
+ EXPECT_DEATH({
+ bitset.contains(kOutOfRange);
+ }, kDeathMessage);
+
+ ASSERT_TRUE(bitset.empty());
+}
+#endif // NDEBUG
diff --git a/src/kudu/util/bitset.h b/src/kudu/util/bitset.h
new file mode 100644
index 0000000..61a6274
--- /dev/null
+++ b/src/kudu/util/bitset.h
@@ -0,0 +1,202 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+#pragma once
+
+#include <bitset>
+#include <cstddef>
+#include <initializer_list>
+#include <iterator>
+#include <utility>
+
+#include <glog/logging.h>
+
+#include "kudu/gutil/macros.h"
+
+// Utility template for working with a bitset to make it feel more like a
+// container. E.g., this is useful for building containers for enum types,
+// instead of using a hashed container.
+//
+// This supports operating on 'IntType' types with values ranging from [0,
+// MaxVals). The underlying bitset is of size 'MaxVals', which must be known at
+// compile time.
+//
+// Under the hood, std::bitset uses word-sized bitwise instructions on
+// stack-allocated bytes, so the expectation is that 'MaxVals' is not many
+// times larger than the word-size. A size limit of 64 bits is enforced at
+// compile time.
+template <typename IntType, size_t MaxVals>
+class FixedBitSet {
+ public:
+ // These types are exposed to match those provided by STL containers, which
+ // allows template instantiations to be used by map utility functions.
+ class iterator;
+ typedef IntType key_type;
+ typedef IntType value_type;
+
+ // Constructs an empty FixedBitSet.
+ FixedBitSet() {}
+
+ // Constructs a new FixedBitSet from an initializer list.
+ FixedBitSet(std::initializer_list<IntType> list) {
+ for (const IntType& val : list) {
+ insert(val);
+ }
+ }
+
+ // Constructs a new FixedBitSet from a container.
+ template <typename Container>
+ explicit FixedBitSet(const Container& c) {
+ for (const IntType& val : c) {
+ insert(val);
+ }
+ }
+
+ // Inserts 'val' into the set.
+ std::pair<iterator, bool> insert(const IntType& val) {
+ DCHECK_LT(val, MaxVals);
+ bool not_present = !contains(val);
+ if (not_present) {
+ bitset_.set(static_cast<size_t>(val));
+ }
+ return { iterator(this, static_cast<int>(val)), not_present };
+ }
+
+ // Removes 'val' from the set if it exists.
+ size_t erase(const IntType val) {
+ DCHECK_LT(val, MaxVals);
+ bool not_present = !contains(val);
+ if (not_present) {
+ return 0;
+ }
+ bitset_.set(static_cast<size_t>(val), false);
+ return 1;
+ }
+
+ // Returns whether 'val' exists in the set.
+ bool contains(const IntType& val) const {
+ DCHECK_LT(val, MaxVals);
+ return bitset_.test(val);
+ }
+
+ // Returns whether the set is empty.
+ bool empty() const {
+ return bitset_.none();
+ }
+
+ // Clears the contents of the set.
+ void clear() {
+ bitset_.reset();
+ }
+
+ // Returns the number of set bits.
+ size_t size() const {
+ return bitset_.count();
+ }
+
+ // Resets the set to have the contents of the container of int-typed values.
+ template <typename C>
+ void reset(const C& container) {
+ clear();
+ for (const IntType& item : container) {
+ insert(item);
+ }
+ }
+
+ // Forward iterator that points to the start of the values in the bitset.
+ // Points at the first set bit, or at end() if no bits are set.
+ iterator begin() const {
+ iterator iter(this, -1);
+ iter.seek_forward();
+ return iter;
+ }
+
+ // Forward iterator that points to the end of the values in the bitset.
+ iterator end() const {
+ return iterator(this, MaxVals);
+ }
+
+ // Forward iterator that points at the element 'val' if it exists, or at
+ // end() if it doesn't exist.
+ iterator find(const IntType& val) const {
+ return contains(val) ? iterator(this, val) : end();
+ }
+
+ private:
+ COMPILE_ASSERT(MaxVals < 64, bitset_size_too_large);
+ std::bitset<MaxVals> bitset_;
+};
+
+// Forward iterator class for a FixedBitSet.
+template <typename IntType, size_t MaxVals>
+class FixedBitSet<IntType, MaxVals>::iterator :
+ public std::iterator<std::forward_iterator_tag, IntType> {
+ public:
+ // Returns the value currently pointed at by this iterator.
+ IntType operator*() {
+ return static_cast<IntType>(idx_);
+ }
+
+ // Prefix increment operator. Advances the iterator to the next position and
+ // returns it.
+ iterator& operator++() {
+ seek_forward();
+ return *this;
+ }
+
+ // Postfix increment operator. Advances the iterator to the next position,
+ // returning a non-iterated iterator.
+ iterator operator++(int) {
+ iterator iter_copy = *this;
+ seek_forward();
+ return iter_copy;
+ }
+
+ // Returns whether this iterator is the same as 'other'.
+ bool operator==(const iterator& other) const {
+ return (idx_ == other.idx_) && (fbs_ == other.fbs_);
+ }
+
+ // Returns whether this iterator is not the same as 'other'.
+ bool operator!=(const iterator& other) const {
+ return !(*this == other);
+ }
+
+ private:
+ friend class FixedBitSet<IntType, MaxVals>;
+ iterator(const FixedBitSet<IntType, MaxVals>* fbs, int idx)
+ : fbs_(fbs),
+ idx_(idx) {}
+
+ // Seeks forward to the next set bit, or until at the end of the iterator.
+ void seek_forward() {
+ if (idx_ == MaxVals) {
+ return;
+ }
+ while (++idx_ < MaxVals) {
+ if (fbs_->contains(static_cast<IntType>(idx_))) {
+ break;
+ }
+ }
+ }
+
+ // The underlying FixedBitSet that this iterator is iterating over.
+ const FixedBitSet<IntType, MaxVals>* const fbs_;
+
+ // This iterator's current position.
+ int idx_;
+};
+