This is an automated email from the ASF dual-hosted git repository. awong pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kudu.git
commit 972edb9332dde7cc6fba542343c7a7434ef6cbff Author: Bankim Bhavsar <[email protected]> AuthorDate: Tue Mar 31 12:40:47 2020 -0700 encoding-test: Clean up bitshuffle tests a little Also adds functions in random number generator to generate 128-bit integers. Change-Id: I8b7bee1f952c46c09df5102ac1916141c6935284 Reviewed-on: http://gerrit.cloudera.org:8080/15043 Reviewed-by: Andrew Wong <[email protected]> Tested-by: Kudu Jenkins --- src/kudu/cfile/encoding-test.cc | 58 ++++++++++++++++++++++----------------- src/kudu/util/random.h | 53 +++++++++++++++++++++++++++++++---- src/kudu/util/random_util-test.cc | 50 +++++++++++++++++++++++++++++++++ src/kudu/util/random_util.h | 31 +++++++++++++++++++-- 4 files changed, 158 insertions(+), 34 deletions(-) diff --git a/src/kudu/cfile/encoding-test.cc b/src/kudu/cfile/encoding-test.cc index cb25c1b..007f5d4 100644 --- a/src/kudu/cfile/encoding-test.cc +++ b/src/kudu/cfile/encoding-test.cc @@ -23,6 +23,7 @@ #include <cstdio> #include <cstdlib> #include <functional> +#include <initializer_list> #include <limits> #include <memory> #include <ostream> @@ -436,7 +437,7 @@ class TestEncoding : public KuduTest { } template <DataType Type, class BlockBuilder, class BlockDecoder> - void TestEncodeDecodeTemplateBlockEncoder(typename TypeTraits<Type>::cpp_type* src, + void TestEncodeDecodeTemplateBlockEncoder(const typename TypeTraits<Type>::cpp_type* src, size_t size) { typedef typename TypeTraits<Type>::cpp_type CppType; const uint32_t kOrdinalPosBase = 12345; @@ -719,39 +720,46 @@ TEST_F(TestEncoding, TestPlainBlockEncoder) { // Test for bitshuffle block, for INT32, INT64, INT128, FLOAT, DOUBLE TEST_F(TestEncoding, TestBShufInt32BlockEncoder) { - const uint32_t kSize = 10000; - - unique_ptr<int32_t[]> ints(new int32_t[kSize]); - for (int i = 0; i < kSize; i++) { - ints.get()[i] = random(); + using limits = std::numeric_limits<int32_t>; + Random rng(SeedRandom()); + auto sequences = { + CreateRandomIntegersInRange<int32_t>(10000, 1000, 2000, &rng), + CreateRandomIntegersInRange<int32_t>(10000, 0, limits::max(), &rng), + CreateRandomIntegersInRange<int32_t>(10000, limits::min(), limits::max(), &rng) + }; + for (const auto& ints : sequences) { + TestEncodeDecodeTemplateBlockEncoder<INT32, BShufBlockBuilder<INT32>, + BShufBlockDecoder<INT32> >(ints.data(), ints.size()); } - - TestEncodeDecodeTemplateBlockEncoder<INT32, BShufBlockBuilder<INT32>, - BShufBlockDecoder<INT32>>(ints.get(), kSize); } TEST_F(TestEncoding, TestBShufInt64BlockEncoder) { - const uint32_t kSize = 10000; - - unique_ptr<int64_t[]> ints(new int64_t[kSize]); - for (int i = 0; i < kSize; i++) { - ints.get()[i] = random(); + using limits = std::numeric_limits<int64_t>; + Random rng(SeedRandom()); + auto sequences = { + CreateRandomIntegersInRange<int64_t>(10000, 1000, 2000, &rng), + CreateRandomIntegersInRange<int64_t>(10000, 0, limits::max(), &rng), + CreateRandomIntegersInRange<int64_t>(10000, limits::min(), limits::max(), &rng) + }; + for (const auto& ints : sequences) { + TestEncodeDecodeTemplateBlockEncoder<INT64, BShufBlockBuilder<INT64>, + BShufBlockDecoder<INT64> >(ints.data(), ints.size()); } - - TestEncodeDecodeTemplateBlockEncoder<INT64, BShufBlockBuilder<INT64>, - BShufBlockDecoder<INT64>>(ints.get(), kSize); } TEST_F(TestEncoding, TestBShufInt128BlockEncoder) { - const uint32_t kSize = 10000; - - unique_ptr<int128_t[]> ints(new int128_t[kSize]); - for (int i = 0; i < kSize; i++) { - ints.get()[i] = random(); + Random rng(SeedRandom()); + // As per the note in int128.h, numeric_limits<> on int128_t can give incorrect results. + // Hence using predefined min, max constants. + auto sequences = { + CreateRandomIntegersInRange<int128_t>(10000, 1000, 2000, &rng), + CreateRandomIntegersInRange<int128_t>(10000, 0, INT128_MAX, &rng), + CreateRandomIntegersInRange<int128_t>(10000, INT128_MIN, INT128_MAX, &rng) + }; + for (const auto& ints : sequences) { + TestEncodeDecodeTemplateBlockEncoder<INT128, BShufBlockBuilder<INT128>, + BShufBlockDecoder<INT128> >(ints.data(), ints.size()); } - - TestEncodeDecodeTemplateBlockEncoder<INT128, BShufBlockBuilder<INT128>, - BShufBlockDecoder<INT128>>(ints.get(), kSize); } TEST_F(TestEncoding, TestBShufFloatBlockEncoder) { diff --git a/src/kudu/util/random.h b/src/kudu/util/random.h index 51e578b..9c2722d 100644 --- a/src/kudu/util/random.h +++ b/src/kudu/util/random.h @@ -12,6 +12,7 @@ #include "kudu/gutil/casts.h" #include "kudu/gutil/map-util.h" +#include "kudu/util/int128.h" #include "kudu/util/locks.h" namespace kudu { @@ -82,6 +83,16 @@ class Random { return large; } + // Next pseudo-random 128-bit unsigned integer. + uint128_t Next128() { + uint128_t large = Next64(); + large <<= 64U; + large |= Next64(); + // Next64() provides entire range of numbers up to 64th bit. + // So unlike Next64(), no need to fill MSB bit(s). + return large; + } + // Returns a uniformly distributed value in the range [0..n-1] // REQUIRES: n > 0 uint32_t Uniform(uint32_t n) { return Next() % n; } @@ -93,6 +104,9 @@ class Random { // REQUIRES: n > 0 uint64_t Uniform64(uint64_t n) { return Next64() % n; } + // Returns a uniformly distributed 128-bit value in the range [0..n-1] + uint128_t Uniform128(uint128_t n) { return Next128() % n; } + // Randomly returns true ~"1/n" of the time, and false otherwise. // REQUIRES: n > 0 bool OneIn(int n) { return (Next() % n) == 0; } @@ -140,6 +154,11 @@ class ThreadSafeRandom { return random_.Next64(); } + uint128_t Next128() { + std::lock_guard<simple_spinlock> l(lock_); + return random_.Next128(); + } + uint32_t Uniform(uint32_t n) { std::lock_guard<simple_spinlock> l(lock_); return random_.Uniform(n); @@ -155,6 +174,11 @@ class ThreadSafeRandom { return random_.Uniform64(n); } + uint128_t Uniform128(uint128_t n) { + std::lock_guard<simple_spinlock> l(lock_); + return random_.Uniform128(n); + } + bool OneIn(int n) { std::lock_guard<simple_spinlock> l(lock_); return random_.OneIn(n); @@ -207,18 +231,35 @@ inline double Random::Normal(double mean, double std_dev) { } // Generate next random integer with data-type specified by IntType template -// parameter which must be a 32-bit or 64-bit integer. This constexpr function +// parameter which must be a 32-bit, 64-bit, or 128-bit unsigned integer. This constexpr function // is useful when generating random numbers in a loop with template parameter -// and avoids the run-time cost of determining Next32() v/s Next64() in RNG. +// and avoids the run-time cost of determining Next32() v/s Next64() v/s Next128() in RNG. // // Note: This constexpr function can't be a member function of // Random/ThreadSafeRandom class because it invokes non-const member function. template<typename IntType, class RNG> constexpr IntType GetNextRandom(RNG* rand) { - static_assert( - std::is_integral<IntType>::value && (sizeof(IntType) == 4 || sizeof(IntType) == 8), - "Only 32-bit and 64-bit integers supported"); - return sizeof(IntType) == 4 ? rand->Next32() : rand->Next64(); + // type_traits not defined for 128-bit int data types, hence not checking for integral value. + static_assert(sizeof(IntType) == 4 || sizeof(IntType) == 8 || sizeof(IntType) == 16, + "Only 32-bit, 64-bit, or 128-bit integers supported"); + + // if/switch statement disallowed in constexpr function in C++11. + return sizeof(IntType) == 4 ? rand->Next32() : + sizeof(IntType) == 8 ? rand->Next64() : rand->Next128(); +} + +// Generate next random integer in the [0, n-1] range with data-type specified by IntType template +// parameter which must be a 32-bit, 64-bit, or 128-bit unsigned integer. +// Note: Same comments apply to this constexpr function as GetNextRandom() function above. +template<typename IntType, class RNG> +constexpr IntType GetNextUniformRandom(IntType n, RNG* rand) { + // type_traits not defined for 128-bit int data types, hence not checking for integral value. + static_assert(sizeof(n) == 4 || sizeof(n) == 8 || sizeof(n) == 16, + "Only 32-bit, 64-bit, or 128-bit integers generated"); + + // if/switch statement disallowed in constexpr function in C++11. + return sizeof(n) == 4 ? rand->Uniform32(n) : + sizeof(n) == 8 ? rand->Uniform64(n) : rand->Uniform128(n); } } // namespace kudu diff --git a/src/kudu/util/random_util-test.cc b/src/kudu/util/random_util-test.cc index cf17225..9170aca 100644 --- a/src/kudu/util/random_util-test.cc +++ b/src/kudu/util/random_util-test.cc @@ -19,13 +19,16 @@ #include <cstdint> #include <cstring> +#include <limits> #include <ostream> #include <unordered_set> +#include <vector> #include <glog/logging.h> #include <gtest/gtest.h> #include "kudu/gutil/map-util.h" +#include "kudu/util/int128.h" #include "kudu/util/random.h" #include "kudu/util/test_util.h" @@ -77,6 +80,26 @@ TEST_F(RandomUtilTest, TestRandomString) { CheckEmpty(start, 0, 0, kLenMax); } +// Static helper function for testing generation of random numbers in range. +// CreateRandomUniqueIntegers() doesn't support 128-bit integers yet. Hence a separate +// template function instead of member of TemplateRandomUtilTest class. +// TODO(bankim): Once CreateRandomIntegersInRange() supports 128-bit integers make it a +// member function of TemplateRandomUtilTest class. +template <typename IntType> +static void RunCreateRandomIntegersInRangerHelper(int num_trials, IntType min_val, IntType max_val, + Random* rng) { + static constexpr int kMaxNumVals = 1000; + for (int i = 0; i < num_trials; ++i) { + int num_vals = rng->Uniform(kMaxNumVals); + auto vals = CreateRandomIntegersInRange<IntType>(num_vals, min_val, max_val, rng); + ASSERT_EQ(num_vals, vals.size()); + for (const auto& v : vals) { + ASSERT_GE(v, min_val); + ASSERT_LT(v, max_val); + } + } +} + template<typename IntType> class TemplateRandomUtilTest : public RandomUtilTest { public: @@ -95,6 +118,15 @@ class TemplateRandomUtilTest : public RandomUtilTest { } } } + + void RunCreateRandomIntegersInRange() { + static constexpr IntType min_val = std::numeric_limits<IntType>::min(); + static constexpr IntType max_val = std::numeric_limits<IntType>::max(); + // Exercise entire range. + RunCreateRandomIntegersInRangerHelper(kNumTrials, min_val, max_val, &rng_); + // Exercise partial range. + RunCreateRandomIntegersInRangerHelper(kNumTrials, min_val / 2, max_val / 2, &rng_); + } }; // Testing with char, short data-types will result in compile-time error, as expected. @@ -106,4 +138,22 @@ TYPED_TEST(TemplateRandomUtilTest, RunCreateRandomUniqueIntegers) { this->RunCreateRandomUniqueIntegers(); } +TYPED_TEST(TemplateRandomUtilTest, RunCreateRandomIntegersInRange) { + this->RunCreateRandomIntegersInRange(); +} + +// TODO(bankim): CreateRandomUniqueIntegers() doesn't support 128-bit integers yet. +// Once it does add int128_t and uint128_t types to IntTypes and use +// templatized TemplateRandomUtilTest instead. +TEST_F(RandomUtilTest, RunCreateRandom128BitIntegersInRange) { + // Exercise entire range. + RunCreateRandomIntegersInRangerHelper<int128_t>(kNumTrials, INT128_MIN, INT128_MAX, &rng_); + RunCreateRandomIntegersInRangerHelper<uint128_t>(kNumTrials, 0, UINT128_MAX, &rng_); + + // Exercise partial range. + RunCreateRandomIntegersInRangerHelper<int128_t>(kNumTrials, INT128_MIN / 2, INT128_MAX / 2, + &rng_); + RunCreateRandomIntegersInRangerHelper<uint128_t>(kNumTrials, 0, UINT128_MAX / 2, &rng_); +} + } // namespace kudu diff --git a/src/kudu/util/random_util.h b/src/kudu/util/random_util.h index b89ea3c..4448023 100644 --- a/src/kudu/util/random_util.h +++ b/src/kudu/util/random_util.h @@ -27,6 +27,7 @@ #include <glog/logging.h> #include "kudu/gutil/map-util.h" +#include "kudu/util/int128_util.h" #include "kudu/util/random.h" namespace kudu { @@ -103,13 +104,13 @@ void ReservoirSample(const Collection& c, int k, const Set& avoid, // Generates random and unique 32-bit or 64-bit integers that are not present in // the "avoid" set. +// TODO(bankim): Add support for 128-bit integers by providing hash value for 128-bit data types. template<typename IntType, class Set, class RNG> std::unordered_set<IntType> CreateRandomUniqueIntegers(const int num_values, const Set& avoid, RNG* rand) { - static_assert( - std::is_integral<IntType>::value && (sizeof(IntType) == 4 || sizeof(IntType) == 8), - "Only 32-bit and 64-bit integers generated"); + static_assert(std::is_integral<IntType>::value && (sizeof(IntType) == 4 || sizeof(IntType) == 8), + "Only 32-bit or 64-bit integers generated"); std::unordered_set<IntType> result; while (result.size() < num_values) { @@ -124,5 +125,29 @@ std::unordered_set<IntType> CreateRandomUniqueIntegers(const int num_values, return result; } +// Generates random 32-bit, 64-bit, or 128-bit integers in the [min_val, max_val) range. +template<typename IntType, class RNG> +// When entire range is specified for signed IntType, result of max_val - min_val is -1 +// which when converted to unsigned type correctly represents the max value for the +// same unsigned IntType. Variants of Uniform() in RNG work with unsigned types. +// Hence the ASAN suppression. +ATTRIBUTE_NO_SANITIZE_INTEGER +std::vector<IntType> CreateRandomIntegersInRange(const int num_values, + IntType min_val, + IntType max_val, + RNG* rand) { + // type_traits not defined for 128-bit int data types, hence not checking for integral value. + static_assert(sizeof(IntType) == 4 || sizeof(IntType) == 8 || sizeof(IntType) == 16, + "Only 32-bit, 64-bit, or 128-bit integers generated"); + CHECK_LT(min_val, max_val); + + std::vector<IntType> result(num_values); + for (auto& v : result) { + v = min_val + GetNextUniformRandom(max_val - min_val, rand); + } + return result; +} + + } // namespace kudu
