This is an automated email from the ASF dual-hosted git repository. adar pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kudu.git
commit 4fc493da48358386b9b4855e5598ff867c696ab2 Author: Andrew Wong <[email protected]> AuthorDate: Tue Apr 2 19:43:35 2019 -0700 util: pull Random methods out from tests I've found a couple of Random utility methods pretty useful in some tests I'm writing, so I'm pulling them out into random_util. Change-Id: Ie74eca50d770b9470f6cfb400cf632381a20bd93 Reviewed-on: http://gerrit.cloudera.org:8080/12918 Reviewed-by: Adar Dembo <[email protected]> Tested-by: Kudu Jenkins --- src/kudu/master/placement_policy.cc | 3 +- .../tserver/tablet_server_authorization-test.cc | 45 ++++++---------- src/kudu/util/random-test.cc | 9 ++-- src/kudu/util/random.h | 41 --------------- src/kudu/util/random_util.h | 60 ++++++++++++++++++++-- 5 files changed, 80 insertions(+), 78 deletions(-) diff --git a/src/kudu/master/placement_policy.cc b/src/kudu/master/placement_policy.cc index 5b0b0d6..6219597 100644 --- a/src/kudu/master/placement_policy.cc +++ b/src/kudu/master/placement_policy.cc @@ -36,6 +36,7 @@ #include "kudu/gutil/strings/substitute.h" #include "kudu/master/ts_descriptor.h" #include "kudu/util/random.h" +#include "kudu/util/random_util.h" using std::multimap; using std::numeric_limits; @@ -280,7 +281,7 @@ shared_ptr<TSDescriptor> PlacementPolicy::SelectReplica( // If we've only got one server left, 'two_choices' will actually // just contain one element. vector<shared_ptr<TSDescriptor>> two_choices; - rng_->ReservoirSample(ts_descs, 2, excluded, &two_choices); + ReservoirSample(ts_descs, 2, excluded, rng_, &two_choices); DCHECK_LE(two_choices.size(), 2); if (two_choices.size() == 2) { diff --git a/src/kudu/tserver/tablet_server_authorization-test.cc b/src/kudu/tserver/tablet_server_authorization-test.cc index 56d6b25..9209ac1 100644 --- a/src/kudu/tserver/tablet_server_authorization-test.cc +++ b/src/kudu/tserver/tablet_server_authorization-test.cc @@ -68,6 +68,7 @@ #include "kudu/util/monotime.h" #include "kudu/util/pb_util.h" #include "kudu/util/random.h" +#include "kudu/util/random_util.h" #include "kudu/util/slice.h" #include "kudu/util/status.h" #include "kudu/util/test_macros.h" @@ -221,26 +222,6 @@ class AuthzTabletServerTestBase : public TabletServerTestBase { protected: - // Utility to select an element from a container at random. - template <typename Container, typename T> - T SelectAtRandom(const Container& c) const { - CHECK(!c.empty()); - vector<T> rand_list; - prng_.ReservoirSample(c, 1, set<T>{}, &rand_list); - CHECK_EQ(1, rand_list.size()); - return rand_list[0]; - } - - // Returns a randomly-sized randomized list of elements from 'full_set'. - template <typename T> - vector<T> RandomFromList(const vector<T>& full_list, int min_to_return) const { - CHECK_GT(full_list.size(), min_to_return); - int num_to_return = min_to_return + rand() % (full_list.size() - min_to_return); - vector<T> rand_list; - prng_.ReservoirSample(full_list, num_to_return, set<T>{}, &rand_list); - return rand_list; - } - // Signer used to create authz tokens. unique_ptr<TokenSigner> signer_; @@ -594,7 +575,8 @@ class ScanPrivilegeAuthzTest : public AuthzTabletServerTestBase, // Determine which column to sabotage if needed. boost::optional<string> misnamed_col; if (special_col == SpecialColumn::MISNAMED) { - misnamed_col = SelectAtRandom<unordered_set<string>, string>(scan.projected_cols); + misnamed_col = SelectRandomElement<unordered_set<string>, string, Random>( + scan.projected_cols, &prng_); } for (const auto& col_name : scan.projected_cols) { int col_idx = schema_.find_column(col_name); @@ -635,7 +617,7 @@ class ScanPrivilegeAuthzTest : public AuthzTabletServerTestBase, // Determine which column to sabotage if needed. boost::optional<string> misnamed_col; if (special_col == SpecialColumn::MISNAMED) { - misnamed_col = SelectAtRandom<unordered_set<string>, string>(cols); + misnamed_col = SelectRandomElement<unordered_set<string>, string, Random>(cols, &prng_); } for (const auto& col_name : cols) { int col_idx = client_schema.find_column(col_name); @@ -712,7 +694,8 @@ class ScanPrivilegeAuthzTest : public AuthzTabletServerTestBase, // Returns a randomly selected set of column names, of at least size // 'min_returned'. unordered_set<string> RandomColumnNames(int min_returned = 0) const { - vector<string> rand_privileges = RandomFromList(col_names_, min_returned); + vector<string> rand_privileges = SelectRandomSubset<vector<string>, string, Random>( + col_names_, min_returned, &prng_); unordered_set<string> rand_set(rand_privileges.begin(), rand_privileges.end()); return rand_set; } @@ -1178,8 +1161,8 @@ class WritePrivilegeAuthzTest : public AuthzTabletServerTestBase { WritePrivileges privileges = RandomWritePrivileges(); ASSERT_FALSE(required_privileges.empty()); if (!privileges.empty()) { - const auto& priv_to_remove = SelectAtRandom<WritePrivileges, WritePrivilegeType>( - required_privileges); + const auto& priv_to_remove = SelectRandomElement<WritePrivileges, WritePrivilegeType, Random>( + required_privileges, &prng_); LOG(INFO) << "Removing write privilege: " << WritePrivilegeToString(priv_to_remove); privileges.erase(priv_to_remove); } @@ -1210,7 +1193,9 @@ class WritePrivilegeAuthzTest : public AuthzTabletServerTestBase { RowOperationsPB::UPSERT, }; RowOpTypes types; - types.reset(RandomFromList(write_op_types, /*min_to_return=*/1)); + types.reset(SelectRandomSubset< + vector<RowOperationsPB::Type>, RowOperationsPB::Type, Random>( + write_op_types, /*min_to_return=*/1, &prng_)); return types; } @@ -1222,8 +1207,9 @@ class WritePrivilegeAuthzTest : public AuthzTabletServerTestBase { WritePrivilegeType::INSERT, WritePrivilegeType::UPDATE, }; - vector<WritePrivilegeType> rand_types = RandomFromList(write_privilege_types, - /*min_to_return=*/0); + vector<WritePrivilegeType> rand_types = + SelectRandomSubset<vector<WritePrivilegeType>, WritePrivilegeType, Random>( + write_privilege_types, /*min_to_return=*/0, &prng_); WritePrivileges privileges; privileges.reset(rand_types); return privileges; @@ -1281,7 +1267,8 @@ TEST_F(WritePrivilegeAuthzTest, TestWritesRandomized) { vector<WriteOpDescriptor> batch; WritePrivileges required_privileges; for (int i = 0; i < kNumOps; i++) { - const auto op_type = SelectAtRandom<RowOpTypes, RowOperationsPB::Type>(op_types); + const auto op_type = SelectRandomElement<RowOpTypes, RowOperationsPB::Type, Random>( + op_types, &prng_); batch.emplace_back(WriteOpDescriptor({ .op_type = op_type, .key = rand(), diff --git a/src/kudu/util/random-test.cc b/src/kudu/util/random-test.cc index 26c7ab5..db59221 100644 --- a/src/kudu/util/random-test.cc +++ b/src/kudu/util/random-test.cc @@ -24,6 +24,7 @@ #include <gtest/gtest.h> #include "kudu/util/random.h" +#include "kudu/util/random_util.h" #include "kudu/util/test_util.h" using std::numeric_limits; @@ -119,7 +120,7 @@ TEST_F(RandomTest, TestReservoirSample) { vector<int> counts(population.size()); unordered_set<int> avoid; for (int trial = 0; trial < 1000; trial++) { - rng_.ReservoirSample(population, 5, avoid, &results); + ReservoirSample(population, 5, avoid, &rng_, &results); for (int result : results) { counts[result]++; } @@ -139,7 +140,7 @@ TEST_F(RandomTest, TestReservoirSample) { avoid.insert(20); counts.assign(100, 0); for (int trial = 0; trial < 1000; trial++) { - rng_.ReservoirSample(population, 5, avoid, &results); + ReservoirSample(population, 5, avoid, &rng_, &results); for (int result : results) { counts[result]++; } @@ -159,11 +160,11 @@ TEST_F(RandomTest, TestReservoirSamplePopulationTooSmall) { vector<int> results; unordered_set<int> avoid; - rng_.ReservoirSample(population, 20, avoid, &results); + ReservoirSample(population, 20, avoid, &rng_, &results); ASSERT_EQ(population.size(), results.size()); ASSERT_EQ(population, results); - rng_.ReservoirSample(population, 10, avoid, &results); + ReservoirSample(population, 10, avoid, &rng_, &results); ASSERT_EQ(population.size(), results.size()); ASSERT_EQ(population, results); } diff --git a/src/kudu/util/random.h b/src/kudu/util/random.h index e31e475..a3179fe 100644 --- a/src/kudu/util/random.h +++ b/src/kudu/util/random.h @@ -111,40 +111,6 @@ class Random { double NextDoubleFraction() { return Next() / static_cast<double>(random_internal::M + 1.0); } - - // Sample 'k' random elements from the collection 'c' into 'result', taking care not to sample any - // elements that are already present in 'avoid'. - // - // In the case that 'c' has fewer than 'k' elements then all elements in 'c' will be selected. - // - // 'c' should be an iterable STL collection such as a vector, set, or list. - // 'avoid' should be an STL-compatible set. - // - // The results are not stored in a randomized order: the order of results will - // match their order in the input collection. - template<class Collection, class Set, class T> - void ReservoirSample(const Collection& c, int k, const Set& avoid, - std::vector<T>* result) { - result->clear(); - result->reserve(k); - int i = 0; - for (const T& elem : c) { - if (ContainsKey(avoid, elem)) { - continue; - } - i++; - // Fill the reservoir if there is available space. - if (result->size() < k) { - result->push_back(elem); - continue; - } - // Otherwise replace existing elements with decreasing probability. - int j = Uniform(i); - if (j < k) { - (*result)[j] = elem; - } - } - } }; // Thread-safe wrapper around Random. @@ -209,13 +175,6 @@ class ThreadSafeRandom { return random_.NextDoubleFraction(); } - template<class Collection, class Set, class T> - void ReservoirSample(const Collection& c, int k, const Set& avoid, - std::vector<T>* result) { - std::lock_guard<simple_spinlock> l(lock_); - random_.ReservoirSample(c, k, avoid, result); - } - private: simple_spinlock lock_; Random random_; diff --git a/src/kudu/util/random_util.h b/src/kudu/util/random_util.h index bda8c42..9dfc510 100644 --- a/src/kudu/util/random_util.h +++ b/src/kudu/util/random_util.h @@ -15,13 +15,14 @@ // specific language governing permissions and limitations // under the License. -#ifndef KUDU_UTIL_RANDOM_UTIL_H -#define KUDU_UTIL_RANDOM_UTIL_H +#pragma once #include <cstdlib> #include <stdint.h> +#include <set> #include <string> +#include <vector> namespace kudu { @@ -39,6 +40,59 @@ std::string RandomString(size_t n, Random* rng); // pid & tid. uint32_t GetRandomSeed32(); +// Returns a randomly-selected element from the container. +template <typename Container, typename T, typename Rand> +T SelectRandomElement(const Container& c, Rand* r) { + CHECK(!c.empty()); + std::vector<T> rand_list; + ReservoirSample(c, 1, std::set<T>{}, r, &rand_list); + return rand_list[0]; +} + +// Returns a randomly-selected subset from the container. +template <typename Container, typename T, typename Rand> +std::vector<T> SelectRandomSubset(const Container& c, int min_to_return, Rand* r) { + CHECK_GT(c.size(), min_to_return); + int num_to_return = min_to_return + r->Uniform(c.size() - min_to_return); + std::vector<T> rand_list; + ReservoirSample(c, num_to_return, std::set<T>{}, r, &rand_list); + return rand_list; +} + +// Sample 'k' random elements from the collection 'c' into 'result', taking +// care not to sample any elements that are already present in 'avoid'. +// +// In the case that 'c' has fewer than 'k' elements then all elements in 'c' +// will be selected. +// +// 'c' should be an iterable STL collection such as a vector, set, or list. +// 'avoid' should be an STL-compatible set. +// +// The results are not stored in a randomized order: the order of results will +// match their order in the input collection. +template<class Collection, class Set, class T, typename Rand> +void ReservoirSample(const Collection& c, int k, const Set& avoid, + Rand* r, std::vector<T>* result) { + result->clear(); + result->reserve(k); + int i = 0; + for (const T& elem : c) { + if (ContainsKey(avoid, elem)) { + continue; + } + i++; + // Fill the reservoir if there is available space. + if (result->size() < k) { + result->push_back(elem); + continue; + } + // Otherwise replace existing elements with decreasing probability. + int j = r->Uniform(i); + if (j < k) { + (*result)[j] = elem; + } + } +} + } // namespace kudu -#endif // KUDU_UTIL_RANDOM_UTIL_H
