http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d6abb29d/be/src/kudu/util/flags.h ---------------------------------------------------------------------- diff --git a/be/src/kudu/util/flags.h b/be/src/kudu/util/flags.h new file mode 100644 index 0000000..ae20564 --- /dev/null +++ b/be/src/kudu/util/flags.h @@ -0,0 +1,74 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#ifndef KUDU_UTIL_FLAGS_H +#define KUDU_UTIL_FLAGS_H + +#include <gflags/gflags.h> +#include <string> +#include <unordered_map> + +#include "kudu/gutil/macros.h" + +namespace kudu { + +// The umask of the process, set based on the --umask flag during +// HandleCommonFlags(). +extern uint32_t g_parsed_umask; + +// Looks for flags in argv and parses them. Rearranges argv to put +// flags first, or removes them entirely if remove_flags is true. +// If a flag is defined more than once in the command line or flag +// file, the last definition is used. Returns the index (into argv) +// of the first non-flag argument. +// +// This is a wrapper around google::ParseCommandLineFlags, but integrates +// with Kudu flag tags. For example, --helpxml will include the list of +// tags for each flag. This should be be used instead of +// google::ParseCommandLineFlags in any user-facing binary. +// +// See gflags.h for more information. +int ParseCommandLineFlags(int* argc, char*** argv, bool remove_flags); + +// Handle common flags such as -version, -disable_core_dumps, etc. +// This includes the GFlags common flags such as "-help". +// +// Requires that flags have already been parsed using +// google::ParseCommandLineNonHelpFlags(). +void HandleCommonFlags(); + +enum class EscapeMode { + HTML, + NONE +}; + +// Stick the flags into a string. If --redact is set with 'flag', +// the values of flags tagged as sensitive will be redacted. Otherwise, +// the values will be written to the string as-is. The values will +// be HTML escaped if EscapeMode is HTML. +std::string CommandlineFlagsIntoString(EscapeMode mode); + +typedef std::unordered_map<std::string, google::CommandLineFlagInfo> GFlagsMap; + +// Get all the flags different from their defaults. The output is a nicely +// formatted string with --flag=value pairs per line. Redact any flags that +// are tagged as sensitive, if --redact is set with 'flag'. +std::string GetNonDefaultFlags(const GFlagsMap& default_flags); + +GFlagsMap GetFlagsMap(); + +} // namespace kudu +#endif /* KUDU_UTIL_FLAGS_H */
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d6abb29d/be/src/kudu/util/group_varint-inl.h ---------------------------------------------------------------------- diff --git a/be/src/kudu/util/group_varint-inl.h b/be/src/kudu/util/group_varint-inl.h new file mode 100644 index 0000000..8f418b6 --- /dev/null +++ b/be/src/kudu/util/group_varint-inl.h @@ -0,0 +1,268 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#ifndef KUDU_UTIL_GROUP_VARINT_INL_H +#define KUDU_UTIL_GROUP_VARINT_INL_H + +#include <boost/utility/binary.hpp> +#include <glog/logging.h> +#include <stdint.h> +#include <smmintrin.h> + +#include "kudu/util/faststring.h" + +namespace kudu { +namespace coding { + +extern bool SSE_TABLE_INITTED; +extern uint8_t SSE_TABLE[256 * 16] __attribute__((aligned(16))); +extern uint8_t VARINT_SELECTOR_LENGTHS[256]; + +const uint32_t MASKS[4] = { 0xff, 0xffff, 0xffffff, 0xffffffff }; + + +// Calculate the number of bytes to encode the given unsigned int. +inline size_t CalcRequiredBytes32(uint32_t i) { + // | 1 because the result is undefined for the 0 case + return sizeof(uint32_t) - __builtin_clz(i|1)/8; +} + +// Decode a set of 4 group-varint encoded integers from the given pointer. +// +// Requires that there are at up to 3 extra bytes remaining in 'src' after +// the last integer. +// +// Returns a pointer following the last decoded integer. +inline const uint8_t *DecodeGroupVarInt32( + const uint8_t *src, + uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d) { + + uint8_t a_sel = (*src & BOOST_BINARY(11 00 00 00)) >> 6; + uint8_t b_sel = (*src & BOOST_BINARY(00 11 00 00)) >> 4; + uint8_t c_sel = (*src & BOOST_BINARY(00 00 11 00)) >> 2; + uint8_t d_sel = (*src & BOOST_BINARY(00 00 00 11 )); + + src++; // skip past selector byte + + *a = *reinterpret_cast<const uint32_t *>(src) & MASKS[a_sel]; + src += a_sel + 1; + + *b = *reinterpret_cast<const uint32_t *>(src) & MASKS[b_sel]; + src += b_sel + 1; + + *c = *reinterpret_cast<const uint32_t *>(src) & MASKS[c_sel]; + src += c_sel + 1; + + *d = *reinterpret_cast<const uint32_t *>(src) & MASKS[d_sel]; + src += d_sel + 1; + + return src; +} + +// Decode total length of the encoded integers from the given pointer, +// include the tag byte. +inline size_t DecodeGroupVarInt32_GetGroupSize(const uint8_t *src) { + return VARINT_SELECTOR_LENGTHS[*src] + 1; +} + +// Decode a set of 4 group-varint encoded integers from the given pointer. +// +// Returns a pointer following the last decoded integer. +inline const uint8_t *DecodeGroupVarInt32_SlowButSafe( + const uint8_t *src, + uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d) { + + // VARINT_SELECTOR_LENGTHS[] isn't initialized until SSE_TABLE_INITTED is true + DCHECK(SSE_TABLE_INITTED); + + const size_t total_len = DecodeGroupVarInt32_GetGroupSize(src); + + uint8_t safe_buf[17]; + memcpy(safe_buf, src, total_len); + DecodeGroupVarInt32(safe_buf, a, b, c, d); + return src + total_len; +} + + +inline void DoExtractM128(__m128i results, + uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d) { +#define SSE_USE_EXTRACT_PS +#ifdef SSE_USE_EXTRACT_PS + // _mm_extract_ps turns into extractps, which is slightly faster + // than _mm_extract_epi32 (which turns into pextrd) + // Apparently pextrd involves one more micro-op + // than extractps. + // + // A uint32 cfile macro-benchmark is about 3% faster with this code path. + *a = _mm_extract_ps((__v4sf)results, 0); + *b = _mm_extract_ps((__v4sf)results, 1); + *c = _mm_extract_ps((__v4sf)results, 2); + *d = _mm_extract_ps((__v4sf)results, 3); +#else + *a = _mm_extract_epi32(results, 0); + *b = _mm_extract_epi32(results, 1); + *c = _mm_extract_epi32(results, 2); + *d = _mm_extract_epi32(results, 3); +#endif +} + +// Same as above, but uses SSE so may be faster. +// TODO: remove this and just automatically pick the right implementation at runtime. +// +// NOTE: the src buffer must be have at least 17 bytes remaining in it, so this +// code path is not usable at the end of a block. +inline const uint8_t *DecodeGroupVarInt32_SSE( + const uint8_t *src, + uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d) { + + DCHECK(SSE_TABLE_INITTED); + + uint8_t sel_byte = *src++; + __m128i shuffle_mask = _mm_load_si128( + reinterpret_cast<__m128i *>(&SSE_TABLE[sel_byte * 16])); + __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src)); + + __m128i results = _mm_shuffle_epi8(data, shuffle_mask); + + // It would look like the following would be most efficient, + // since it turns into a single movdqa instruction: + // *reinterpret_cast<__m128i *>(ret) = results; + // (where ret is an aligned array of ints, which the user must pass) + // but it is actually slower than the below alternatives by a + // good amount -- even though these result in more instructions. + DoExtractM128(results, a, b, c, d); + src += VARINT_SELECTOR_LENGTHS[sel_byte]; + + return src; +} + +// Optimized function which decodes a group of uint32s from 'src' into 'ret', +// which should have enough space for 4 uint32s. During decoding, adds 'add' +// to the vector in parallel. +// +// NOTE: the src buffer must be have at least 17 bytes remaining in it, so this +// code path is not usable at the end of a block. +inline const uint8_t *DecodeGroupVarInt32_SSE_Add( + const uint8_t *src, + uint32_t *ret, + __m128i add) { + + DCHECK(SSE_TABLE_INITTED); + + uint8_t sel_byte = *src++; + __m128i shuffle_mask = _mm_load_si128( + reinterpret_cast<__m128i *>(&SSE_TABLE[sel_byte * 16])); + __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src)); + + __m128i decoded_deltas = _mm_shuffle_epi8(data, shuffle_mask); + __m128i results = _mm_add_epi32(decoded_deltas, add); + + DoExtractM128(results, &ret[0], &ret[1], &ret[2], &ret[3]); + + src += VARINT_SELECTOR_LENGTHS[sel_byte]; + return src; +} + + +// Append a set of group-varint encoded integers to the given faststring. +inline void AppendGroupVarInt32( + faststring *s, + uint32_t a, uint32_t b, uint32_t c, uint32_t d) { + + uint8_t a_tag = CalcRequiredBytes32(a) - 1; + uint8_t b_tag = CalcRequiredBytes32(b) - 1; + uint8_t c_tag = CalcRequiredBytes32(c) - 1; + uint8_t d_tag = CalcRequiredBytes32(d) - 1; + + uint8_t prefix_byte = + (a_tag << 6) | + (b_tag << 4) | + (c_tag << 2) | + (d_tag); + + uint8_t size = 1 + + a_tag + 1 + + b_tag + 1 + + c_tag + 1 + + d_tag + 1; + + size_t old_size = s->size(); + + // Reserving 4 extra bytes means we can use simple + // 4-byte stores instead of variable copies here -- + // if we hang off the end of the array into the "empty" area, it's OK. + // We'll chop it back off down below. + s->resize(old_size + size + 4); + uint8_t *ptr = &((*s)[old_size]); + +#if __BYTE_ORDER != __LITTLE_ENDIAN +#error dont support big endian currently +#endif + + *ptr++ = prefix_byte; + memcpy(ptr, &a, 4); + ptr += a_tag + 1; + memcpy(ptr, &b, 4); + ptr += b_tag + 1; + memcpy(ptr, &c, 4); + ptr += c_tag + 1; + memcpy(ptr, &d, 4); + + s->resize(old_size + size); +} + +// Append a sequence of uint32s encoded using group-varint. +// +// 'frame_of_reference' is also subtracted from each integer +// before encoding. +// +// If frame_of_reference is greater than any element in the array, +// results are undefined. +// +// For best performance, users should already have reserved adequate +// space in 's' (CalcRequiredBytes32 can be handy here) +inline void AppendGroupVarInt32Sequence(faststring *s, uint32_t frame_of_reference, + uint32_t *ints, size_t size) { + uint32_t *p = ints; + while (size >= 4) { + AppendGroupVarInt32(s, + p[0] - frame_of_reference, + p[1] - frame_of_reference, + p[2] - frame_of_reference, + p[3] - frame_of_reference); + size -= 4; + p += 4; + } + + + uint32_t trailer[4] = {0, 0, 0, 0}; + uint32_t *trailer_p = &trailer[0]; + + if (size > 0) { + while (size > 0) { + *trailer_p++ = *p++ - frame_of_reference; + size--; + } + + AppendGroupVarInt32(s, trailer[0], trailer[1], trailer[2], trailer[3]); + } +} + + +} // namespace coding +} // namespace kudu + +#endif http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d6abb29d/be/src/kudu/util/group_varint-test.cc ---------------------------------------------------------------------- diff --git a/be/src/kudu/util/group_varint-test.cc b/be/src/kudu/util/group_varint-test.cc new file mode 100644 index 0000000..62176ef --- /dev/null +++ b/be/src/kudu/util/group_varint-test.cc @@ -0,0 +1,135 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <glog/logging.h> +#include <gtest/gtest.h> +#include <vector> + +#include "kudu/util/group_varint-inl.h" +#include "kudu/util/stopwatch.h" + +namespace kudu { +namespace coding { + +extern void DumpSSETable(); + +// Encodes the given four ints as group-varint, then +// decodes and ensures the result is the same. +static void DoTestRoundTripGVI32( + uint32_t a, uint32_t b, uint32_t c, uint32_t d, + bool use_sse = false) { + faststring buf; + AppendGroupVarInt32(&buf, a, b, c, d); + + int real_size = buf.size(); + + // The implementations actually read past the group varint, + // so append some extra padding data to ensure that it's not reading + // uninitialized memory. The SSE implementation uses 128-bit reads + // and the non-SSE one uses 32-bit reads. + buf.append(string('x', use_sse ? 16 : 4)); + + uint32_t ret[4]; + + const uint8_t *end; + + if (use_sse) { + end = DecodeGroupVarInt32_SSE( + buf.data(), &ret[0], &ret[1], &ret[2], &ret[3]); + } else { + end = DecodeGroupVarInt32( + buf.data(), &ret[0], &ret[1], &ret[2], &ret[3]); + } + + ASSERT_EQ(a, ret[0]); + ASSERT_EQ(b, ret[1]); + ASSERT_EQ(c, ret[2]); + ASSERT_EQ(d, ret[3]); + ASSERT_EQ(end, buf.data() + real_size); +} + + +TEST(TestGroupVarInt, TestSSETable) { + DumpSSETable(); + faststring buf; + AppendGroupVarInt32(&buf, 0, 0, 0, 0); + DoTestRoundTripGVI32(0, 0, 0, 0, true); + DoTestRoundTripGVI32(1, 2, 3, 4, true); + DoTestRoundTripGVI32(1, 2000, 3, 200000, true); +} + +TEST(TestGroupVarInt, TestGroupVarInt) { + faststring buf; + AppendGroupVarInt32(&buf, 0, 0, 0, 0); + ASSERT_EQ(5UL, buf.size()); + ASSERT_EQ(0, memcmp("\x00\x00\x00\x00\x00", buf.data(), 5)); + buf.clear(); + + // All 1-byte + AppendGroupVarInt32(&buf, 1, 2, 3, 254); + ASSERT_EQ(5UL, buf.size()); + ASSERT_EQ(0, memcmp("\x00\x01\x02\x03\xfe", buf.data(), 5)); + buf.clear(); + + // Mixed 1-byte and 2-byte + AppendGroupVarInt32(&buf, 256, 2, 3, 65535); + ASSERT_EQ(7UL, buf.size()); + ASSERT_EQ(BOOST_BINARY(01 00 00 01), buf.at(0)); + ASSERT_EQ(256, *reinterpret_cast<const uint16_t *>(&buf[1])); + ASSERT_EQ(2, *reinterpret_cast<const uint8_t *>(&buf[3])); + ASSERT_EQ(3, *reinterpret_cast<const uint8_t *>(&buf[4])); + ASSERT_EQ(65535, *reinterpret_cast<const uint16_t *>(&buf[5])); +} + + +// Round-trip encode/decodes using group varint +TEST(TestGroupVarInt, TestRoundTrip) { + // A few simple tests. + DoTestRoundTripGVI32(0, 0, 0, 0); + DoTestRoundTripGVI32(1, 2, 3, 4); + DoTestRoundTripGVI32(1, 2000, 3, 200000); + + // Then a randomized test. + for (int i = 0; i < 10000; i++) { + DoTestRoundTripGVI32(random(), random(), random(), random()); + } +} + +#ifdef NDEBUG +TEST(TestGroupVarInt, EncodingBenchmark) { + int n_ints = 1000000; + + std::vector<uint32_t> ints; + ints.reserve(n_ints); + for (int i = 0; i < n_ints; i++) { + ints.push_back(i); + } + + faststring s; + // conservative reservation + s.reserve(ints.size() * 4); + + LOG_TIMING(INFO, "Benchmark") { + for (int i = 0; i < 100; i++) { + s.clear(); + AppendGroupVarInt32Sequence(&s, 0, &ints[0], n_ints); + } + } +} +#endif +} // namespace coding +} // namespace kudu http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d6abb29d/be/src/kudu/util/group_varint.cc ---------------------------------------------------------------------- diff --git a/be/src/kudu/util/group_varint.cc b/be/src/kudu/util/group_varint.cc new file mode 100644 index 0000000..53c362b --- /dev/null +++ b/be/src/kudu/util/group_varint.cc @@ -0,0 +1,78 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <stdint.h> +#include <string.h> +#include <boost/utility/binary.hpp> +#include <glog/logging.h> + +#include "kudu/util/group_varint-inl.h" +#include "kudu/util/hexdump.h" +#include "kudu/util/slice.h" + +namespace kudu { +namespace coding { + +bool SSE_TABLE_INITTED = false; +uint8_t SSE_TABLE[256 * 16] __attribute__((aligned(16))); +uint8_t VARINT_SELECTOR_LENGTHS[256]; + +__attribute__((constructor)) +static void InitializeSSETables() { + memset(SSE_TABLE, 0xff, sizeof(SSE_TABLE)); + + for (int i = 0; i < 256; i++) { + uint32_t *entry = reinterpret_cast<uint32_t *>(&SSE_TABLE[i * 16]); + + uint8_t selectors[] = { + static_cast<uint8_t>((i & BOOST_BINARY(11 00 00 00)) >> 6), + static_cast<uint8_t>((i & BOOST_BINARY(00 11 00 00)) >> 4), + static_cast<uint8_t>((i & BOOST_BINARY(00 00 11 00)) >> 2), + static_cast<uint8_t>((i & BOOST_BINARY(00 00 00 11))) }; + + // 00000000 -> + // 00 ff ff ff 01 ff ff ff 02 ff ff ff 03 ff ff ff + + // 01000100 -> + // 00 01 ff ff 02 ff ff ff 03 04 ff ff 05 ff ff ff + + uint8_t offset = 0; + + for (int j = 0; j < 4; j++) { + uint8_t num_bytes = selectors[j] + 1; + uint8_t *entry_bytes = reinterpret_cast<uint8_t *>(&entry[j]); + + for (int k = 0; k < num_bytes; k++) { + *entry_bytes++ = offset++; + } + } + + VARINT_SELECTOR_LENGTHS[i] = offset; + } + + SSE_TABLE_INITTED = true; +} + +void DumpSSETable() { + LOG(INFO) << "SSE table:\n" + << kudu::HexDump(Slice(SSE_TABLE, sizeof(SSE_TABLE))); +} + + + +} // namespace coding +} // namespace kudu http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d6abb29d/be/src/kudu/util/hash_util-test.cc ---------------------------------------------------------------------- diff --git a/be/src/kudu/util/hash_util-test.cc b/be/src/kudu/util/hash_util-test.cc new file mode 100644 index 0000000..a88f275 --- /dev/null +++ b/be/src/kudu/util/hash_util-test.cc @@ -0,0 +1,40 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "kudu/util/test_util.h" + +#include "kudu/util/hash_util.h" + +namespace kudu { + +// Test Murmur2 Hash64 returns the expected values for inputs. These tests are +// duplicated on the Java side to ensure that hash computations are stable +// across both platforms. +TEST(HashUtilTest, TestMurmur2Hash64) { + uint64_t hash; + + hash = HashUtil::MurmurHash2_64("ab", 2, 0); + ASSERT_EQ(7115271465109541368, hash); + + hash = HashUtil::MurmurHash2_64("abcdefg", 7, 0); + ASSERT_EQ(2601573339036254301, hash); + + hash = HashUtil::MurmurHash2_64("quick brown fox", 15, 42); + ASSERT_EQ(3575930248840144026, hash); +} + +} // namespace kudu http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d6abb29d/be/src/kudu/util/hash_util.h ---------------------------------------------------------------------- diff --git a/be/src/kudu/util/hash_util.h b/be/src/kudu/util/hash_util.h new file mode 100644 index 0000000..7892252 --- /dev/null +++ b/be/src/kudu/util/hash_util.h @@ -0,0 +1,68 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <stdint.h> + +#ifndef KUDU_UTIL_HASH_UTIL_H +#define KUDU_UTIL_HASH_UTIL_H + +namespace kudu { + +/// Utility class to compute hash values. +class HashUtil { + public: + + static const uint64_t MURMUR_PRIME = 0xc6a4a7935bd1e995; + static const int MURMUR_R = 47; + + /// Murmur2 hash implementation returning 64-bit hashes. + static uint64_t MurmurHash2_64(const void* input, int len, uint64_t seed) { + uint64_t h = seed ^ (len * MURMUR_PRIME); + + const uint64_t* data = reinterpret_cast<const uint64_t*>(input); + const uint64_t* end = data + (len / sizeof(uint64_t)); + + while (data != end) { + uint64_t k = *data++; + k *= MURMUR_PRIME; + k ^= k >> MURMUR_R; + k *= MURMUR_PRIME; + h ^= k; + h *= MURMUR_PRIME; + } + + const uint8_t* data2 = reinterpret_cast<const uint8_t*>(data); + switch (len & 7) { + case 7: h ^= static_cast<uint64_t>(data2[6]) << 48; + case 6: h ^= static_cast<uint64_t>(data2[5]) << 40; + case 5: h ^= static_cast<uint64_t>(data2[4]) << 32; + case 4: h ^= static_cast<uint64_t>(data2[3]) << 24; + case 3: h ^= static_cast<uint64_t>(data2[2]) << 16; + case 2: h ^= static_cast<uint64_t>(data2[1]) << 8; + case 1: h ^= static_cast<uint64_t>(data2[0]); + h *= MURMUR_PRIME; + } + + h ^= h >> MURMUR_R; + h *= MURMUR_PRIME; + h ^= h >> MURMUR_R; + return h; + } +}; + +} // namespace kudu +#endif http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d6abb29d/be/src/kudu/util/hdr_histogram-test.cc ---------------------------------------------------------------------- diff --git a/be/src/kudu/util/hdr_histogram-test.cc b/be/src/kudu/util/hdr_histogram-test.cc new file mode 100644 index 0000000..bf1c101 --- /dev/null +++ b/be/src/kudu/util/hdr_histogram-test.cc @@ -0,0 +1,113 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#include <gtest/gtest.h> + +#include "kudu/util/hdr_histogram.h" +#include "kudu/util/status.h" +#include "kudu/util/test_util.h" + +namespace kudu { + +static const int kSigDigits = 2; + +class HdrHistogramTest : public KuduTest { +}; + +TEST_F(HdrHistogramTest, SimpleTest) { + uint64_t highest_val = 10000LU; + + HdrHistogram hist(highest_val, kSigDigits); + ASSERT_EQ(0, hist.CountInBucketForValue(1)); + hist.Increment(1); + ASSERT_EQ(1, hist.CountInBucketForValue(1)); + hist.IncrementBy(1, 3); + ASSERT_EQ(4, hist.CountInBucketForValue(1)); + hist.Increment(10); + ASSERT_EQ(1, hist.CountInBucketForValue(10)); + hist.Increment(20); + ASSERT_EQ(1, hist.CountInBucketForValue(20)); + ASSERT_EQ(0, hist.CountInBucketForValue(1000)); + hist.Increment(1000); + hist.Increment(1001); + ASSERT_EQ(2, hist.CountInBucketForValue(1000)); + + ASSERT_EQ(1 + 1 * 3 + 10 + 20 + 1000 + 1001, + hist.TotalSum()); +} + +TEST_F(HdrHistogramTest, TestCoordinatedOmission) { + uint64_t interval = 1000; + int loop_iters = 100; + int64_t normal_value = 10; + HdrHistogram hist(1000000LU, kSigDigits); + for (int i = 1; i <= loop_iters; i++) { + // Simulate a periodic "large value" that would exhibit coordinated + // omission were this loop to sleep on 'interval'. + int64_t value = (i % normal_value == 0) ? interval * 10 : normal_value; + + hist.IncrementWithExpectedInterval(value, interval); + } + ASSERT_EQ(loop_iters - (loop_iters / normal_value), + hist.CountInBucketForValue(normal_value)); + for (int i = interval; i <= interval * 10; i += interval) { + ASSERT_EQ(loop_iters / normal_value, hist.CountInBucketForValue(i)); + } +} + +static const int kExpectedSum = + 10 * 80 + 100 * 10 + 1000 * 5 + 10000 * 3 + 100000 * 1 + 1000000 * 1; +static const int kExpectedMax = 1000000; +static const int kExpectedCount = 100; +static const int kExpectedMin = 10; +static void load_percentiles(HdrHistogram* hist) { + hist->IncrementBy(10, 80); + hist->IncrementBy(100, 10); + hist->IncrementBy(1000, 5); + hist->IncrementBy(10000, 3); + hist->IncrementBy(100000, 1); + hist->IncrementBy(1000000, 1); +} + +static void validate_percentiles(HdrHistogram* hist, uint64_t specified_max) { + double expected_mean = + static_cast<double>(kExpectedSum) / (80 + 10 + 5 + 3 + 1 + 1); + + ASSERT_EQ(kExpectedMin, hist->MinValue()); + ASSERT_EQ(kExpectedMax, hist->MaxValue()); + ASSERT_EQ(kExpectedSum, hist->TotalSum()); + ASSERT_NEAR(expected_mean, hist->MeanValue(), 0.001); + ASSERT_EQ(kExpectedCount, hist->TotalCount()); + ASSERT_EQ(10, hist->ValueAtPercentile(80)); + ASSERT_EQ(kExpectedCount, hist->ValueAtPercentile(90)); + ASSERT_EQ(hist->LowestEquivalentValue(specified_max), hist->ValueAtPercentile(99)); + ASSERT_EQ(hist->LowestEquivalentValue(specified_max), hist->ValueAtPercentile(99.99)); + ASSERT_EQ(hist->LowestEquivalentValue(specified_max), hist->ValueAtPercentile(100)); +} + +TEST_F(HdrHistogramTest, PercentileAndCopyTest) { + uint64_t specified_max = 10000; + HdrHistogram hist(specified_max, kSigDigits); + load_percentiles(&hist); + NO_FATALS(validate_percentiles(&hist, specified_max)); + + HdrHistogram copy(hist); + NO_FATALS(validate_percentiles(©, specified_max)); + + ASSERT_EQ(hist.TotalSum(), copy.TotalSum()); +} + +} // namespace kudu http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d6abb29d/be/src/kudu/util/hdr_histogram.cc ---------------------------------------------------------------------- diff --git a/be/src/kudu/util/hdr_histogram.cc b/be/src/kudu/util/hdr_histogram.cc new file mode 100644 index 0000000..e510df3 --- /dev/null +++ b/be/src/kudu/util/hdr_histogram.cc @@ -0,0 +1,497 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +// +// Portions of these classes were ported from Java to C++ from the sources +// available at https://github.com/HdrHistogram/HdrHistogram . +// +// The code in this repository code was Written by Gil Tene, Michael Barker, +// and Matt Warren, and released to the public domain, as explained at +// http://creativecommons.org/publicdomain/zero/1.0/ +#include "kudu/util/hdr_histogram.h" + +#include <algorithm> +#include <cmath> +#include <limits> + +#include "kudu/gutil/atomicops.h" +#include "kudu/gutil/bits.h" +#include "kudu/gutil/strings/substitute.h" +#include "kudu/util/status.h" + +using base::subtle::Atomic64; +using base::subtle::NoBarrier_AtomicIncrement; +using base::subtle::NoBarrier_Store; +using base::subtle::NoBarrier_Load; +using base::subtle::NoBarrier_CompareAndSwap; +using strings::Substitute; + +namespace kudu { + +HdrHistogram::HdrHistogram(uint64_t highest_trackable_value, int num_significant_digits) + : highest_trackable_value_(highest_trackable_value), + num_significant_digits_(num_significant_digits), + counts_array_length_(0), + bucket_count_(0), + sub_bucket_count_(0), + sub_bucket_half_count_magnitude_(0), + sub_bucket_half_count_(0), + sub_bucket_mask_(0), + total_count_(0), + total_sum_(0), + min_value_(std::numeric_limits<Atomic64>::max()), + max_value_(0), + counts_(nullptr) { + Init(); +} + +HdrHistogram::HdrHistogram(const HdrHistogram& other) + : highest_trackable_value_(other.highest_trackable_value_), + num_significant_digits_(other.num_significant_digits_), + counts_array_length_(0), + bucket_count_(0), + sub_bucket_count_(0), + sub_bucket_half_count_magnitude_(0), + sub_bucket_half_count_(0), + sub_bucket_mask_(0), + total_count_(0), + total_sum_(0), + min_value_(std::numeric_limits<Atomic64>::max()), + max_value_(0), + counts_(nullptr) { + Init(); + + // Not a consistent snapshot but we try to roughly keep it close. + // Copy the sum and min first. + NoBarrier_Store(&total_sum_, NoBarrier_Load(&other.total_sum_)); + NoBarrier_Store(&min_value_, NoBarrier_Load(&other.min_value_)); + + uint64_t total_copied_count = 0; + // Copy the counts in order of ascending magnitude. + for (int i = 0; i < counts_array_length_; i++) { + uint64_t count = NoBarrier_Load(&other.counts_[i]); + NoBarrier_Store(&counts_[i], count); + total_copied_count += count; + } + // Copy the max observed value last. + NoBarrier_Store(&max_value_, NoBarrier_Load(&other.max_value_)); + // We must ensure the total is consistent with the copied counts. + NoBarrier_Store(&total_count_, total_copied_count); +} + +bool HdrHistogram::IsValidHighestTrackableValue(uint64_t highest_trackable_value) { + return highest_trackable_value >= kMinHighestTrackableValue; +} + +bool HdrHistogram::IsValidNumSignificantDigits(int num_significant_digits) { + return num_significant_digits >= kMinValidNumSignificantDigits && + num_significant_digits <= kMaxValidNumSignificantDigits; +} + +void HdrHistogram::Init() { + // Verify parameter validity + CHECK(IsValidHighestTrackableValue(highest_trackable_value_)) << + Substitute("highest_trackable_value must be >= $0", kMinHighestTrackableValue); + CHECK(IsValidNumSignificantDigits(num_significant_digits_)) << + Substitute("num_significant_digits must be between $0 and $1", + kMinValidNumSignificantDigits, kMaxValidNumSignificantDigits); + + uint32_t largest_value_with_single_unit_resolution = + 2 * static_cast<uint32_t>(pow(10.0, num_significant_digits_)); + + // We need to maintain power-of-two sub_bucket_count_ (for clean direct + // indexing) that is large enough to provide unit resolution to at least + // largest_value_with_single_unit_resolution. So figure out + // largest_value_with_single_unit_resolution's nearest power-of-two + // (rounded up), and use that: + + // The sub-buckets take care of the precision. + // Each sub-bucket is sized to have enough bits for the requested + // 10^precision accuracy. + int sub_bucket_count_magnitude = + Bits::Log2Ceiling(largest_value_with_single_unit_resolution); + sub_bucket_half_count_magnitude_ = + (sub_bucket_count_magnitude >= 1) ? sub_bucket_count_magnitude - 1 : 0; + + // sub_bucket_count_ is approx. 10^num_sig_digits (as a power of 2) + sub_bucket_count_ = pow(2.0, sub_bucket_half_count_magnitude_ + 1); + sub_bucket_mask_ = sub_bucket_count_ - 1; + sub_bucket_half_count_ = sub_bucket_count_ / 2; + + // The buckets take care of the magnitude. + // Determine exponent range needed to support the trackable value with no + // overflow: + uint64_t trackable_value = sub_bucket_count_ - 1; + int buckets_needed = 1; + while (trackable_value < highest_trackable_value_) { + trackable_value <<= 1; + buckets_needed++; + } + bucket_count_ = buckets_needed; + + counts_array_length_ = (bucket_count_ + 1) * sub_bucket_half_count_; + counts_.reset(new Atomic64[counts_array_length_]()); // value-initialized +} + +void HdrHistogram::Increment(int64_t value) { + IncrementBy(value, 1); +} + +void HdrHistogram::IncrementBy(int64_t value, int64_t count) { + DCHECK_GE(value, 0); + DCHECK_GE(count, 0); + + // Dissect the value into bucket and sub-bucket parts, and derive index into + // counts array: + int bucket_index = BucketIndex(value); + int sub_bucket_index = SubBucketIndex(value, bucket_index); + int counts_index = CountsArrayIndex(bucket_index, sub_bucket_index); + + // Increment bucket, total, and sum. + NoBarrier_AtomicIncrement(&counts_[counts_index], count); + NoBarrier_AtomicIncrement(&total_count_, count); + NoBarrier_AtomicIncrement(&total_sum_, value * count); + + // Update min, if needed. + { + Atomic64 min_val; + while (PREDICT_FALSE(value < (min_val = MinValue()))) { + Atomic64 old_val = NoBarrier_CompareAndSwap(&min_value_, min_val, value); + if (PREDICT_TRUE(old_val == min_val)) break; // CAS success. + } + } + + // Update max, if needed. + { + Atomic64 max_val; + while (PREDICT_FALSE(value > (max_val = MaxValue()))) { + Atomic64 old_val = NoBarrier_CompareAndSwap(&max_value_, max_val, value); + if (PREDICT_TRUE(old_val == max_val)) break; // CAS success. + } + } +} + +void HdrHistogram::IncrementWithExpectedInterval(int64_t value, + int64_t expected_interval_between_samples) { + Increment(value); + if (expected_interval_between_samples <= 0) { + return; + } + for (int64_t missing_value = value - expected_interval_between_samples; + missing_value >= expected_interval_between_samples; + missing_value -= expected_interval_between_samples) { + Increment(missing_value); + } +} + +//////////////////////////////////// + +int HdrHistogram::BucketIndex(uint64_t value) const { + if (PREDICT_FALSE(value > highest_trackable_value_)) { + value = highest_trackable_value_; + } + // Here we are calculating the power-of-2 magnitude of the value with a + // correction for precision in the first bucket. + // Smallest power of 2 containing value. + int pow2ceiling = Bits::Log2Ceiling64(value | sub_bucket_mask_); + return pow2ceiling - (sub_bucket_half_count_magnitude_ + 1); +} + +int HdrHistogram::SubBucketIndex(uint64_t value, int bucket_index) const { + if (PREDICT_FALSE(value > highest_trackable_value_)) { + value = highest_trackable_value_; + } + // We hack off the magnitude and are left with only the relevant precision + // portion, which gives us a direct index into the sub-bucket. TODO: Right?? + return static_cast<int>(value >> bucket_index); +} + +int HdrHistogram::CountsArrayIndex(int bucket_index, int sub_bucket_index) const { + DCHECK(sub_bucket_index < sub_bucket_count_); + DCHECK(bucket_index < bucket_count_); + DCHECK(bucket_index == 0 || (sub_bucket_index >= sub_bucket_half_count_)); + // Calculate the index for the first entry in the bucket: + // (The following is the equivalent of ((bucket_index + 1) * sub_bucket_half_count_) ): + int bucket_base_index = (bucket_index + 1) << sub_bucket_half_count_magnitude_; + // Calculate the offset in the bucket: + int offset_in_bucket = sub_bucket_index - sub_bucket_half_count_; + return bucket_base_index + offset_in_bucket; +} + +uint64_t HdrHistogram::CountAt(int bucket_index, int sub_bucket_index) const { + return counts_[CountsArrayIndex(bucket_index, sub_bucket_index)]; +} + +uint64_t HdrHistogram::CountInBucketForValue(uint64_t value) const { + int bucket_index = BucketIndex(value); + int sub_bucket_index = SubBucketIndex(value, bucket_index); + return CountAt(bucket_index, sub_bucket_index); +} + +uint64_t HdrHistogram::ValueFromIndex(int bucket_index, int sub_bucket_index) { + return static_cast<uint64_t>(sub_bucket_index) << bucket_index; +} + +//////////////////////////////////// + +uint64_t HdrHistogram::SizeOfEquivalentValueRange(uint64_t value) const { + int bucket_index = BucketIndex(value); + int sub_bucket_index = SubBucketIndex(value, bucket_index); + uint64_t distance_to_next_value = + (1 << ((sub_bucket_index >= sub_bucket_count_) ? (bucket_index + 1) : bucket_index)); + return distance_to_next_value; +} + +uint64_t HdrHistogram::LowestEquivalentValue(uint64_t value) const { + int bucket_index = BucketIndex(value); + int sub_bucket_index = SubBucketIndex(value, bucket_index); + uint64_t this_value_base_level = ValueFromIndex(bucket_index, sub_bucket_index); + return this_value_base_level; +} + +uint64_t HdrHistogram::HighestEquivalentValue(uint64_t value) const { + return NextNonEquivalentValue(value) - 1; +} + +uint64_t HdrHistogram::MedianEquivalentValue(uint64_t value) const { + return (LowestEquivalentValue(value) + (SizeOfEquivalentValueRange(value) >> 1)); +} + +uint64_t HdrHistogram::NextNonEquivalentValue(uint64_t value) const { + return LowestEquivalentValue(value) + SizeOfEquivalentValueRange(value); +} + +bool HdrHistogram::ValuesAreEquivalent(uint64_t value1, uint64_t value2) const { + return (LowestEquivalentValue(value1) == LowestEquivalentValue(value2)); +} + +uint64_t HdrHistogram::MinValue() const { + if (PREDICT_FALSE(TotalCount() == 0)) return 0; + return NoBarrier_Load(&min_value_); +} + +uint64_t HdrHistogram::MaxValue() const { + if (PREDICT_FALSE(TotalCount() == 0)) return 0; + return NoBarrier_Load(&max_value_); +} + +double HdrHistogram::MeanValue() const { + uint64_t count = TotalCount(); + if (PREDICT_FALSE(count == 0)) return 0.0; + return static_cast<double>(TotalSum()) / count; +} + +uint64_t HdrHistogram::ValueAtPercentile(double percentile) const { + uint64_t count = TotalCount(); + if (PREDICT_FALSE(count == 0)) return 0; + + double requested_percentile = std::min(percentile, 100.0); // Truncate down to 100% + uint64_t count_at_percentile = + static_cast<uint64_t>(((requested_percentile / 100.0) * count) + 0.5); // Round + // Make sure we at least reach the first recorded entry + count_at_percentile = std::max(count_at_percentile, static_cast<uint64_t>(1)); + + uint64_t total_to_current_iJ = 0; + for (int i = 0; i < bucket_count_; i++) { + int j = (i == 0) ? 0 : (sub_bucket_count_ / 2); + for (; j < sub_bucket_count_; j++) { + total_to_current_iJ += CountAt(i, j); + if (total_to_current_iJ >= count_at_percentile) { + uint64_t valueAtIndex = ValueFromIndex(i, j); + return valueAtIndex; + } + } + } + + LOG(DFATAL) << "Fell through while iterating, likely concurrent modification of histogram"; + return 0; +} + +/////////////////////////////////////////////////////////////////////// +// AbstractHistogramIterator +/////////////////////////////////////////////////////////////////////// + +AbstractHistogramIterator::AbstractHistogramIterator(const HdrHistogram* histogram) + : histogram_(CHECK_NOTNULL(histogram)), + cur_iter_val_(), + histogram_total_count_(histogram_->TotalCount()), + current_bucket_index_(0), + current_sub_bucket_index_(0), + current_value_at_index_(0), + next_bucket_index_(0), + next_sub_bucket_index_(1), + next_value_at_index_(1), + prev_value_iterated_to_(0), + total_count_to_prev_index_(0), + total_count_to_current_index_(0), + total_value_to_current_index_(0), + count_at_this_value_(0), + fresh_sub_bucket_(true) { +} + +bool AbstractHistogramIterator::HasNext() const { + return total_count_to_current_index_ < histogram_total_count_; +} + +Status AbstractHistogramIterator::Next(HistogramIterationValue* value) { + if (histogram_->TotalCount() != histogram_total_count_) { + return Status::IllegalState("Concurrently modified histogram while traversing it"); + } + + // Move through the sub buckets and buckets until we hit the next reporting level: + while (!ExhaustedSubBuckets()) { + count_at_this_value_ = + histogram_->CountAt(current_bucket_index_, current_sub_bucket_index_); + if (fresh_sub_bucket_) { // Don't add unless we've incremented since last bucket... + total_count_to_current_index_ += count_at_this_value_; + total_value_to_current_index_ += + count_at_this_value_ * histogram_->MedianEquivalentValue(current_value_at_index_); + fresh_sub_bucket_ = false; + } + if (ReachedIterationLevel()) { + uint64_t value_iterated_to = ValueIteratedTo(); + + // Update iterator value. + cur_iter_val_.value_iterated_to = value_iterated_to; + cur_iter_val_.value_iterated_from = prev_value_iterated_to_; + cur_iter_val_.count_at_value_iterated_to = count_at_this_value_; + cur_iter_val_.count_added_in_this_iteration_step = + (total_count_to_current_index_ - total_count_to_prev_index_); + cur_iter_val_.total_count_to_this_value = total_count_to_current_index_; + cur_iter_val_.total_value_to_this_value = total_value_to_current_index_; + cur_iter_val_.percentile = + ((100.0 * total_count_to_current_index_) / histogram_total_count_); + cur_iter_val_.percentile_level_iterated_to = PercentileIteratedTo(); + + prev_value_iterated_to_ = value_iterated_to; + total_count_to_prev_index_ = total_count_to_current_index_; + // Move the next percentile reporting level forward. + IncrementIterationLevel(); + + *value = cur_iter_val_; + return Status::OK(); + } + IncrementSubBucket(); + } + return Status::IllegalState("Histogram array index out of bounds while traversing"); +} + +double AbstractHistogramIterator::PercentileIteratedTo() const { + return (100.0 * static_cast<double>(total_count_to_current_index_)) / histogram_total_count_; +} + +double AbstractHistogramIterator::PercentileIteratedFrom() const { + return (100.0 * static_cast<double>(total_count_to_prev_index_)) / histogram_total_count_; +} + +uint64_t AbstractHistogramIterator::ValueIteratedTo() const { + return histogram_->HighestEquivalentValue(current_value_at_index_); +} + +bool AbstractHistogramIterator::ExhaustedSubBuckets() const { + return (current_bucket_index_ >= histogram_->bucket_count_); +} + +void AbstractHistogramIterator::IncrementSubBucket() { + fresh_sub_bucket_ = true; + // Take on the next index: + current_bucket_index_ = next_bucket_index_; + current_sub_bucket_index_ = next_sub_bucket_index_; + current_value_at_index_ = next_value_at_index_; + // Figure out the next next index: + next_sub_bucket_index_++; + if (next_sub_bucket_index_ >= histogram_->sub_bucket_count_) { + next_sub_bucket_index_ = histogram_->sub_bucket_half_count_; + next_bucket_index_++; + } + next_value_at_index_ = HdrHistogram::ValueFromIndex(next_bucket_index_, next_sub_bucket_index_); +} + +/////////////////////////////////////////////////////////////////////// +// RecordedValuesIterator +/////////////////////////////////////////////////////////////////////// + +RecordedValuesIterator::RecordedValuesIterator(const HdrHistogram* histogram) + : AbstractHistogramIterator(histogram), + visited_sub_bucket_index_(-1), + visited_bucket_index_(-1) { +} + +void RecordedValuesIterator::IncrementIterationLevel() { + visited_sub_bucket_index_ = current_sub_bucket_index_; + visited_bucket_index_ = current_bucket_index_; +} + +bool RecordedValuesIterator::ReachedIterationLevel() const { + uint64_t current_ij_count = + histogram_->CountAt(current_bucket_index_, current_sub_bucket_index_); + return current_ij_count != 0 && + ((visited_sub_bucket_index_ != current_sub_bucket_index_) || + (visited_bucket_index_ != current_bucket_index_)); +} + +/////////////////////////////////////////////////////////////////////// +// PercentileIterator +/////////////////////////////////////////////////////////////////////// + +PercentileIterator::PercentileIterator(const HdrHistogram* histogram, + int percentile_ticks_per_half_distance) + : AbstractHistogramIterator(histogram), + percentile_ticks_per_half_distance_(percentile_ticks_per_half_distance), + percentile_level_to_iterate_to_(0.0), + percentile_level_to_iterate_from_(0.0), + reached_last_recorded_value_(false) { +} + +bool PercentileIterator::HasNext() const { + if (AbstractHistogramIterator::HasNext()) { + return true; + } + // We want one additional last step to 100% + if (!reached_last_recorded_value_ && (histogram_total_count_ > 0)) { + const_cast<PercentileIterator*>(this)->percentile_level_to_iterate_to_ = 100.0; + const_cast<PercentileIterator*>(this)->reached_last_recorded_value_ = true; + return true; + } + return false; +} + +double PercentileIterator::PercentileIteratedTo() const { + return percentile_level_to_iterate_to_; +} + + +double PercentileIterator::PercentileIteratedFrom() const { + return percentile_level_to_iterate_from_; +} + +void PercentileIterator::IncrementIterationLevel() { + percentile_level_to_iterate_from_ = percentile_level_to_iterate_to_; + // TODO: Can this expression be simplified? + uint64_t percentile_reporting_ticks = percentile_ticks_per_half_distance_ * + static_cast<uint64_t>(pow(2.0, + static_cast<int>(log(100.0 / (100.0 - (percentile_level_to_iterate_to_))) / log(2)) + 1)); + percentile_level_to_iterate_to_ += 100.0 / percentile_reporting_ticks; +} + +bool PercentileIterator::ReachedIterationLevel() const { + if (count_at_this_value_ == 0) return false; + double current_percentile = + (100.0 * static_cast<double>(total_count_to_current_index_)) / histogram_total_count_; + return (current_percentile >= percentile_level_to_iterate_to_); +} + +} // namespace kudu http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d6abb29d/be/src/kudu/util/hdr_histogram.h ---------------------------------------------------------------------- diff --git a/be/src/kudu/util/hdr_histogram.h b/be/src/kudu/util/hdr_histogram.h new file mode 100644 index 0000000..19e31cc --- /dev/null +++ b/be/src/kudu/util/hdr_histogram.h @@ -0,0 +1,351 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#ifndef KUDU_UTIL_HDRHISTOGRAM_H_ +#define KUDU_UTIL_HDRHISTOGRAM_H_ + +// C++ (TR1) port of HdrHistogram. +// +// Portions of these classes were ported from Java to C++ from the sources +// available at https://github.com/HdrHistogram/HdrHistogram . +// +// The code in this repository code was Written by Gil Tene, Michael Barker, +// and Matt Warren, and released to the public domain, as explained at +// http://creativecommons.org/publicdomain/zero/1.0/ +// --------------------------------------------------------------------------- +// +// A High Dynamic Range (HDR) Histogram +// +// HdrHistogram supports the recording and analyzing sampled data value counts +// across a configurable integer value range with configurable value precision +// within the range. Value precision is expressed as the number of significant +// digits in the value recording, and provides control over value quantization +// behavior across the value range and the subsequent value resolution at any +// given level. +// +// For example, a Histogram could be configured to track the counts of observed +// integer values between 0 and 3,600,000,000 while maintaining a value +// precision of 3 significant digits across that range. Value quantization +// within the range will thus be no larger than 1/1,000th (or 0.1%) of any +// value. This example Histogram could be used to track and analyze the counts +// of observed response times ranging between 1 microsecond and 1 hour in +// magnitude, while maintaining a value resolution of 1 microsecond up to 1 +// millisecond, a resolution of 1 millisecond (or better) up to one second, and +// a resolution of 1 second (or better) up to 1,000 seconds. At it's maximum +// tracked value (1 hour), it would still maintain a resolution of 3.6 seconds +// (or better). + +#include <stdint.h> + +#include "kudu/gutil/atomicops.h" +#include "kudu/gutil/gscoped_ptr.h" + +namespace kudu { + +class AbstractHistogramIterator; +class Status; +class RecordedValuesIterator; + +// This implementation allows you to specify a range and accuracy (significant +// digits) to support in an instance of a histogram. The class takes care of +// the rest. At this time, only uint64_t values are supported. +// +// An HdrHistogram consists of a set of buckets, which bucket the magnitude of +// a value stored, and a set of sub-buckets, which implement the tunable +// precision of the storage. So if you specify 3 significant digits of +// precision, then you will get about 10^3 sub-buckets (as a power of 2) for +// each level of magnitude. Magnitude buckets are tracked in powers of 2. +// +// This class is thread-safe. +class HdrHistogram { + public: + // Specify the highest trackable value so that the class has a bound on the + // number of buckets, and # of significant digits (in decimal) so that the + // class can determine the granularity of those buckets. + HdrHistogram(uint64_t highest_trackable_value, int num_significant_digits); + + // Copy-construct a (non-consistent) snapshot of other. + explicit HdrHistogram(const HdrHistogram& other); + + // Validate your params before trying to construct the object. + static bool IsValidHighestTrackableValue(uint64_t highest_trackable_value); + static bool IsValidNumSignificantDigits(int num_significant_digits); + + // Record new data. + void Increment(int64_t value); + void IncrementBy(int64_t value, int64_t count); + + // Record new data, correcting for "coordinated omission". + // + // See https://groups.google.com/d/msg/mechanical-sympathy/icNZJejUHfE/BfDekfBEs_sJ + // for more details. + void IncrementWithExpectedInterval(int64_t value, + int64_t expected_interval_between_samples); + + // Fetch configuration params. + uint64_t highest_trackable_value() const { return highest_trackable_value_; } + int num_significant_digits() const { return num_significant_digits_; } + + // Get indexes into histogram based on value. + int BucketIndex(uint64_t value) const; + int SubBucketIndex(uint64_t value, int bucket_index) const; + + // Count of all events recorded. + uint64_t TotalCount() const { return base::subtle::NoBarrier_Load(&total_count_); } + + // Sum of all events recorded. + uint64_t TotalSum() const { return base::subtle::NoBarrier_Load(&total_sum_); } + + // Return number of items at index. + uint64_t CountAt(int bucket_index, int sub_bucket_index) const; + + // Return count of values in bucket with values equivalent to value. + uint64_t CountInBucketForValue(uint64_t) const; + + // Return representative value based on index. + static uint64_t ValueFromIndex(int bucket_index, int sub_bucket_index); + + // Get the size (in value units) of the range of values that are equivalent + // to the given value within the histogram's resolution. Where "equivalent" + // means that value samples recorded for any two equivalent values are + // counted in a common total count. + uint64_t SizeOfEquivalentValueRange(uint64_t value) const; + + // Get the lowest value that is equivalent to the given value within the + // histogram's resolution. Where "equivalent" means that value samples + // recorded for any two equivalent values are counted in a common total + // count. + uint64_t LowestEquivalentValue(uint64_t value) const; + + // Get the highest value that is equivalent to the given value within the + // histogram's resolution. + uint64_t HighestEquivalentValue(uint64_t value) const; + + // Get a value that lies in the middle (rounded up) of the range of values + // equivalent the given value. + uint64_t MedianEquivalentValue(uint64_t value) const; + + // Get the next value that is not equivalent to the given value within the + // histogram's resolution. + uint64_t NextNonEquivalentValue(uint64_t value) const; + + // Determine if two values are equivalent with the histogram's resolution. + bool ValuesAreEquivalent(uint64_t value1, uint64_t value2) const; + + // Get the exact minimum value (may lie outside the histogram). + uint64_t MinValue() const; + + // Get the exact maximum value (may lie outside the histogram). + uint64_t MaxValue() const; + + // Get the exact mean value of all recorded values in the histogram. + double MeanValue() const; + + // Get the value at a given percentile. + // This is a percentile in percents, i.e. 99.99 percentile. + uint64_t ValueAtPercentile(double percentile) const; + + // Get the percentile at a given value + // TODO: implement + // double PercentileAtOrBelowValue(uint64_t value) const; + + // Get the count of recorded values within a range of value levels. + // (inclusive to within the histogram's resolution) + // TODO: implement + //uint64_t CountBetweenValues(uint64_t low_value, uint64_t high_value) const; + + private: + friend class AbstractHistogramIterator; + + static const uint64_t kMinHighestTrackableValue = 2; + static const int kMinValidNumSignificantDigits = 1; + static const int kMaxValidNumSignificantDigits = 5; + + void Init(); + int CountsArrayIndex(int bucket_index, int sub_bucket_index) const; + + uint64_t highest_trackable_value_; + int num_significant_digits_; + int counts_array_length_; + int bucket_count_; + int sub_bucket_count_; + + // "Hot" fields in the write path. + uint8_t sub_bucket_half_count_magnitude_; + int sub_bucket_half_count_; + uint32_t sub_bucket_mask_; + + // Also hot. + base::subtle::Atomic64 total_count_; + base::subtle::Atomic64 total_sum_; + base::subtle::Atomic64 min_value_; + base::subtle::Atomic64 max_value_; + gscoped_array<base::subtle::Atomic64> counts_; + + HdrHistogram& operator=(const HdrHistogram& other); // Disable assignment operator. +}; + +// Value returned from iterators. +struct HistogramIterationValue { + HistogramIterationValue() + : value_iterated_to(0), + value_iterated_from(0), + count_at_value_iterated_to(0), + count_added_in_this_iteration_step(0), + total_count_to_this_value(0), + total_value_to_this_value(0), + percentile(0.0), + percentile_level_iterated_to(0.0) { + } + + void Reset() { + value_iterated_to = 0; + value_iterated_from = 0; + count_at_value_iterated_to = 0; + count_added_in_this_iteration_step = 0; + total_count_to_this_value = 0; + total_value_to_this_value = 0; + percentile = 0.0; + percentile_level_iterated_to = 0.0; + } + + uint64_t value_iterated_to; + uint64_t value_iterated_from; + uint64_t count_at_value_iterated_to; + uint64_t count_added_in_this_iteration_step; + uint64_t total_count_to_this_value; + uint64_t total_value_to_this_value; + double percentile; + double percentile_level_iterated_to; +}; + +// Base class for iterating through histogram values. +// +// The underlying histogram must not be modified or destroyed while this class +// is iterating over it. +// +// This class is not thread-safe. +class AbstractHistogramIterator { + public: + // Create iterator with new histogram. + // The histogram must not be mutated while the iterator is in use. + explicit AbstractHistogramIterator(const HdrHistogram* histogram); + virtual ~AbstractHistogramIterator() { + } + + // Returns true if the iteration has more elements. + virtual bool HasNext() const; + + // Returns the next element in the iteration. + Status Next(HistogramIterationValue* value); + + virtual double PercentileIteratedTo() const; + virtual double PercentileIteratedFrom() const; + uint64_t ValueIteratedTo() const; + + protected: + // Implementations must override these methods. + virtual void IncrementIterationLevel() = 0; + virtual bool ReachedIterationLevel() const = 0; + + const HdrHistogram* histogram_; + HistogramIterationValue cur_iter_val_; + + uint64_t histogram_total_count_; + + int current_bucket_index_; + int current_sub_bucket_index_; + uint64_t current_value_at_index_; + + int next_bucket_index_; + int next_sub_bucket_index_; + uint64_t next_value_at_index_; + + uint64_t prev_value_iterated_to_; + uint64_t total_count_to_prev_index_; + + uint64_t total_count_to_current_index_; + uint64_t total_value_to_current_index_; + + uint64_t count_at_this_value_; + + private: + bool ExhaustedSubBuckets() const; + void IncrementSubBucket(); + + bool fresh_sub_bucket_; + + DISALLOW_COPY_AND_ASSIGN(AbstractHistogramIterator); +}; + +// Used for iterating through all recorded histogram values using the finest +// granularity steps supported by the underlying representation. The iteration +// steps through all non-zero recorded value counts, and terminates when all +// recorded histogram values are exhausted. +// +// The underlying histogram must not be modified or destroyed while this class +// is iterating over it. +// +// This class is not thread-safe. +class RecordedValuesIterator : public AbstractHistogramIterator { + public: + explicit RecordedValuesIterator(const HdrHistogram* histogram); + + protected: + virtual void IncrementIterationLevel() OVERRIDE; + virtual bool ReachedIterationLevel() const OVERRIDE; + + private: + int visited_sub_bucket_index_; + int visited_bucket_index_; + + DISALLOW_COPY_AND_ASSIGN(RecordedValuesIterator); +}; + +// Used for iterating through histogram values according to percentile levels. +// The iteration is performed in steps that start at 0% and reduce their +// distance to 100% according to the percentileTicksPerHalfDistance parameter, +// ultimately reaching 100% when all recorded histogram values are exhausted. +// +// The underlying histogram must not be modified or destroyed while this class +// is iterating over it. +// +// This class is not thread-safe. +class PercentileIterator : public AbstractHistogramIterator { + public: + // TODO: Explain percentile_ticks_per_half_distance. + PercentileIterator(const HdrHistogram* histogram, + int percentile_ticks_per_half_distance); + virtual bool HasNext() const OVERRIDE; + virtual double PercentileIteratedTo() const OVERRIDE; + virtual double PercentileIteratedFrom() const OVERRIDE; + + protected: + virtual void IncrementIterationLevel() OVERRIDE; + virtual bool ReachedIterationLevel() const OVERRIDE; + + private: + int percentile_ticks_per_half_distance_; + double percentile_level_to_iterate_to_; + double percentile_level_to_iterate_from_; + bool reached_last_recorded_value_; + + DISALLOW_COPY_AND_ASSIGN(PercentileIterator); +}; + +} // namespace kudu + +#endif // KUDU_UTIL_HDRHISTOGRAM_H_ http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d6abb29d/be/src/kudu/util/hexdump.cc ---------------------------------------------------------------------- diff --git a/be/src/kudu/util/hexdump.cc b/be/src/kudu/util/hexdump.cc new file mode 100644 index 0000000..8f7c6cd --- /dev/null +++ b/be/src/kudu/util/hexdump.cc @@ -0,0 +1,80 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "kudu/util/hexdump.h" + +#include <algorithm> +#include <string> + +#include "kudu/gutil/stringprintf.h" +#include "kudu/util/logging.h" +#include "kudu/util/slice.h" + +namespace kudu { + +std::string HexDump(const Slice &slice) { + if (KUDU_SHOULD_REDACT()) { + return kRedactionMessage; + } + + std::string output; + output.reserve(slice.size() * 5); + + const uint8_t *p = slice.data(); + + int rem = slice.size(); + while (rem > 0) { + const uint8_t *line_p = p; + int line_len = std::min(rem, 16); + int line_rem = line_len; + StringAppendF(&output, "%06lx: ", line_p - slice.data()); + + while (line_rem >= 2) { + StringAppendF(&output, "%02x%02x ", + p[0] & 0xff, p[1] & 0xff); + p += 2; + line_rem -= 2; + } + + if (line_rem == 1) { + StringAppendF(&output, "%02x ", + p[0] & 0xff); + p += 1; + line_rem -= 1; + } + + int padding = (16 - line_len) / 2; + + for (int i = 0; i < padding; i++) { + output.append(" "); + } + + for (int i = 0; i < line_len; i++) { + char c = line_p[i]; + if (isprint(c)) { + output.push_back(c); + } else { + output.push_back('.'); + } + } + + output.push_back('\n'); + rem -= line_len; + } + return output; +} +} // namespace kudu http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d6abb29d/be/src/kudu/util/hexdump.h ---------------------------------------------------------------------- diff --git a/be/src/kudu/util/hexdump.h b/be/src/kudu/util/hexdump.h new file mode 100644 index 0000000..eacfad2 --- /dev/null +++ b/be/src/kudu/util/hexdump.h @@ -0,0 +1,34 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#ifndef KUDU_UTIL_HEXDUMP_H +#define KUDU_UTIL_HEXDUMP_H + +#include <string> + +namespace kudu { + +class Slice; + +// Generate an 'xxd'-style hexdump of the given slice. This should only be used +// for debugging, as the format is subject to change and it has not been +// implemented for speed. +// +// The returned string will be redacted if redaction is enabled. +std::string HexDump(const Slice &slice); + +} // namespace kudu +#endif http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d6abb29d/be/src/kudu/util/high_water_mark.h ---------------------------------------------------------------------- diff --git a/be/src/kudu/util/high_water_mark.h b/be/src/kudu/util/high_water_mark.h new file mode 100644 index 0000000..dfc30e4 --- /dev/null +++ b/be/src/kudu/util/high_water_mark.h @@ -0,0 +1,85 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#ifndef KUDU_UTIL_HIGH_WATER_MARK_H +#define KUDU_UTIL_HIGH_WATER_MARK_H + +#include "kudu/gutil/macros.h" +#include "kudu/util/atomic.h" + +namespace kudu { + +// Lock-free integer that keeps track of the highest value seen. +// Similar to Impala's RuntimeProfile::HighWaterMarkCounter. +// HighWaterMark::max_value() returns the highest value seen; +// HighWaterMark::current_value() returns the current value. +class HighWaterMark { + public: + explicit HighWaterMark(int64_t initial_value) + : current_value_(initial_value), + max_value_(initial_value) { + } + + // Return the current value. + int64_t current_value() const { + return current_value_.Load(kMemOrderNoBarrier); + } + + // Return the max value. + int64_t max_value() const { + return max_value_.Load(kMemOrderNoBarrier); + } + + // If current value + 'delta' is <= 'max', increment current value + // by 'delta' and return true; return false otherwise. + bool TryIncrementBy(int64_t delta, int64_t max) { + while (true) { + int64_t old_val = current_value(); + int64_t new_val = old_val + delta; + if (new_val > max) { + return false; + } + if (PREDICT_TRUE(current_value_.CompareAndSet(old_val, + new_val, + kMemOrderNoBarrier))) { + UpdateMax(new_val); + return true; + } + } + } + + void IncrementBy(int64_t amount) { + UpdateMax(current_value_.IncrementBy(amount, kMemOrderNoBarrier)); + } + + void set_value(int64_t v) { + current_value_.Store(v, kMemOrderNoBarrier); + UpdateMax(v); + } + + private: + void UpdateMax(int64_t value) { + max_value_.StoreMax(value, kMemOrderNoBarrier); + } + + AtomicInt<int64_t> current_value_; + AtomicInt<int64_t> max_value_; +}; + +} // namespace kudu +#endif /* KUDU_UTIL_HIGH_WATER_MARK_H */ + + http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d6abb29d/be/src/kudu/util/histogram.proto ---------------------------------------------------------------------- diff --git a/be/src/kudu/util/histogram.proto b/be/src/kudu/util/histogram.proto new file mode 100644 index 0000000..e4526e7 --- /dev/null +++ b/be/src/kudu/util/histogram.proto @@ -0,0 +1,48 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +syntax = "proto2"; +package kudu; + +option java_package = "org.apache.kudu"; + +// Captures the state of an Histogram. +message HistogramSnapshotPB { + required string type = 1; + required string name = 2; + optional string description = 3; + required string unit = 4; + optional string label = 19; + + required uint64 max_trackable_value = 5; + required int32 num_significant_digits = 6; + required uint64 total_count = 7; + optional uint64 total_sum = 18; + required uint64 min = 8; + required double mean = 9; + required uint64 percentile_75 = 10; + required uint64 percentile_95 = 11; + required uint64 percentile_99 = 12; + required uint64 percentile_99_9 = 13; + required uint64 percentile_99_99 = 14; + required uint64 max = 15; + repeated uint64 values = 16 [packed = true]; + repeated uint64 counts = 17 [packed = true]; +} + +message HistogramSnapshotsListPB { + repeated HistogramSnapshotPB histograms = 1; +} http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d6abb29d/be/src/kudu/util/init.cc ---------------------------------------------------------------------- diff --git a/be/src/kudu/util/init.cc b/be/src/kudu/util/init.cc new file mode 100644 index 0000000..3fa634a --- /dev/null +++ b/be/src/kudu/util/init.cc @@ -0,0 +1,86 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "kudu/util/init.h" + +#include <fcntl.h> +#include <unistd.h> + +#include <string> + +#include "kudu/gutil/cpu.h" +#include "kudu/gutil/strings/substitute.h" +#include "kudu/util/status.h" + +using std::string; + +namespace kudu { + +Status BadCPUStatus(const base::CPU& cpu, const char* instruction_set) { + return Status::NotSupported(strings::Substitute( + "The CPU on this system ($0) does not support the $1 instruction " + "set which is required for running Kudu. If you are running inside a VM, " + "you may need to enable SSE4.2 pass-through.", + cpu.cpu_brand(), instruction_set)); +} + +bool IsFdOpen(int fd) { + return fcntl(fd, F_GETFL) != -1; +} + +// Checks that the standard file descriptors are open when the process +// starts. +// +// If these descriptors aren't open, we can run into serious issues: +// we later might open some other files which end up reusing the same +// file descriptor numbers as stderr, and then some library like glog +// may decide to write a log message to what it thinks is stderr. That +// would then overwrite one of our important data files and cause +// corruption! +void CheckStandardFds() { + if (!IsFdOpen(STDIN_FILENO) || + !IsFdOpen(STDOUT_FILENO) || + !IsFdOpen(STDERR_FILENO)) { + // We can't use LOG(FATAL) here because glog isn't initialized yet, and even if it + // were, it would try to write to stderr, which might end up writing the log message + // into some unexpected place. This is a rare enough issue that people can deal with + // the core dump. + abort(); + } +} + +Status CheckCPUFlags() { + base::CPU cpu; + if (!cpu.has_sse42()) { + return BadCPUStatus(cpu, "SSE4.2"); + } + + if (!cpu.has_ssse3()) { + return BadCPUStatus(cpu, "SSSE3"); + } + + return Status::OK(); +} + +void InitKuduOrDie() { + CheckStandardFds(); + CHECK_OK(CheckCPUFlags()); + // NOTE: this function is called before flags are parsed. + // Do not add anything in here which is flag-dependent. +} + +} // namespace kudu http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d6abb29d/be/src/kudu/util/init.h ---------------------------------------------------------------------- diff --git a/be/src/kudu/util/init.h b/be/src/kudu/util/init.h new file mode 100644 index 0000000..3f7916c --- /dev/null +++ b/be/src/kudu/util/init.h @@ -0,0 +1,34 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#ifndef KUDU_UTIL_INIT_H +#define KUDU_UTIL_INIT_H + +#include "kudu/gutil/macros.h" +#include "kudu/util/status.h" + +namespace kudu { + +// Return a NotSupported Status if the current CPU does not support the CPU flags +// required for Kudu. +Status CheckCPUFlags(); + +// Initialize Kudu, checking that the platform we are running on is supported, etc. +// Issues a FATAL log message if we fail to init. +void InitKuduOrDie(); + +} // namespace kudu +#endif /* KUDU_UTIL_INIT_H */ http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d6abb29d/be/src/kudu/util/inline_slice-test.cc ---------------------------------------------------------------------- diff --git a/be/src/kudu/util/inline_slice-test.cc b/be/src/kudu/util/inline_slice-test.cc new file mode 100644 index 0000000..df69028 --- /dev/null +++ b/be/src/kudu/util/inline_slice-test.cc @@ -0,0 +1,84 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <gtest/gtest.h> +#include <vector> + +#include "kudu/gutil/gscoped_ptr.h" +#include "kudu/util/inline_slice.h" +#include "kudu/util/memory/arena.h" + +namespace kudu { + +template<size_t N> +static void TestRoundTrip(InlineSlice<N> *slice, + Arena *arena, + size_t test_size) { + gscoped_ptr<uint8_t[]> buf(new uint8_t[test_size]); + for (int i = 0; i < test_size; i++) { + buf[i] = i & 0xff; + } + + Slice test_input(buf.get(), test_size); + + slice->set(test_input, arena); + Slice ret = slice->as_slice(); + ASSERT_TRUE(ret == test_input) + << "test_size =" << test_size << "\n" + << "ret = " << ret.ToDebugString() << "\n" + << "test_input = " << test_input.ToDebugString(); + + // If the data is small enough to fit inline, then + // the returned slice should point directly into the + // InlineSlice object. + if (test_size < N) { + ASSERT_EQ(reinterpret_cast<const uint8_t *>(slice) + 1, + ret.data()); + } +} + +// Sweep a variety of inputs for a given size of inline +// data +template<size_t N> +static void DoTest() { + Arena arena(1024, 4096); + + // Test a range of inputs both growing and shrinking + InlineSlice<N> my_slice; + ASSERT_EQ(N, sizeof(my_slice)); + + for (size_t to_test = 0; to_test < 1000; to_test++) { + TestRoundTrip(&my_slice, &arena, to_test); + } + for (size_t to_test = 1000; to_test > 0; to_test--) { + TestRoundTrip(&my_slice, &arena, to_test); + } +} + +TEST(TestInlineSlice, Test8ByteInline) { + DoTest<8>(); +} + +TEST(TestInlineSlice, Test12ByteInline) { + DoTest<12>(); +} + +TEST(TestInlineSlice, Test16ByteInline) { + DoTest<16>(); +} + +} // namespace kudu http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d6abb29d/be/src/kudu/util/inline_slice.h ---------------------------------------------------------------------- diff --git a/be/src/kudu/util/inline_slice.h b/be/src/kudu/util/inline_slice.h new file mode 100644 index 0000000..692617b --- /dev/null +++ b/be/src/kudu/util/inline_slice.h @@ -0,0 +1,182 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#ifndef KUDU_UTIL_INLINE_SLICE_H +#define KUDU_UTIL_INLINE_SLICE_H + +#include "kudu/gutil/atomicops.h" +#include "kudu/gutil/casts.h" +#include "kudu/util/memory/arena.h" + +namespace kudu { + +#if __BYTE_ORDER != __LITTLE_ENDIAN +#error This needs to be ported for big endian +#endif + +// Class which represents short strings inline, and stores longer ones +// by instead storing a pointer. +// +// Internal format: +// The buffer must be at least as large as a pointer (eg 8 bytes for 64-bit). +// Let ptr = bit-casting the first 8 bytes as a pointer: +// If buf_[0] < 0xff: +// buf_[0] == length of stored data +// buf_[1..1 + buf_[0]] == inline data +// If buf_[0] == 0xff: +// buf_[1..sizeof(uint8_t *)] == pointer to indirect data, minus the MSB. +// buf_[sizeof(uint8_t *)..] = unused +// TODO: we could store a prefix of the indirect data in this unused space +// in the future, which might be able to short-circuit some comparisons +// +// The indirect data which is pointed to is stored as a 4 byte length followed by +// the actual data. +// +// This class relies on the fact that the most significant bit of any x86 pointer is +// 0 (i.e pointers only use the bottom 48 bits) +// +// If ATOMIC is true, then this class has the semantics that readers will never see +// invalid pointers, even in the case of concurrent access. However, they _may_ see +// invalid *data*. That is to say, calling 'as_slice()' will always return a slice +// which points to a valid memory region -- the memory region may contain garbage +// but will not cause a segfault on access. +// +// These ATOMIC semantics may seem too loose to be useful, but can be used in +// optimistic concurrency control schemes -- so long as accessing the slice doesn't +// produce a segfault, it's OK to read bad data on a race because the higher-level +// concurrency control will cause a retry. +template<size_t STORAGE_SIZE, bool ATOMIC = false> +class InlineSlice { + private: + enum { + kPointerByteWidth = sizeof(uintptr_t), + kPointerBitWidth = kPointerByteWidth * 8, + kMaxInlineData = STORAGE_SIZE - 1 + }; + + static_assert(STORAGE_SIZE >= kPointerByteWidth, + "InlineSlice storage size must be greater than the width of a pointer"); + static_assert(STORAGE_SIZE <= 256, + "InlineSlice storage size must be less than 256 bytes"); + public: + InlineSlice() { + } + + inline const Slice as_slice() const ATTRIBUTE_ALWAYS_INLINE { + DiscriminatedPointer dptr = LoadValue(); + + if (dptr.is_indirect()) { + const uint8_t *indir_data = reinterpret_cast<const uint8_t *>(dptr.pointer); + uint32_t len = *reinterpret_cast<const uint32_t *>(indir_data); + indir_data += sizeof(uint32_t); + return Slice(indir_data, (size_t)len); + } else { + uint8_t len = dptr.discriminator; + DCHECK_LE(len, STORAGE_SIZE - 1); + return Slice(&buf_[1], len); + } + } + + template<class ArenaType> + void set(const Slice &src, ArenaType *alloc_arena) { + set(src.data(), src.size(), alloc_arena); + } + + template<class ArenaType> + void set(const uint8_t *src, size_t len, + ArenaType *alloc_arena) { + if (len <= kMaxInlineData) { + if (ATOMIC) { + // If atomic, we need to make sure that we store the discriminator + // before we copy in any data. Otherwise the data would overwrite + // part of a pointer and a reader might see an invalid address. + DiscriminatedPointer dptr; + dptr.discriminator = len; + dptr.pointer = 0; // will be overwritten + // "Acquire" ensures that the later memcpy doesn't reorder above the + // set of the discriminator bit. + base::subtle::Acquire_Store(reinterpret_cast<volatile AtomicWord *>(buf_), + bit_cast<uintptr_t>(dptr)); + } else { + buf_[0] = len; + } + memcpy(&buf_[1], src, len); + + } else { + // TODO: if already indirect and the current storage has enough space, just reuse that. + + // Set up the pointed-to data before setting a pointer to it. This ensures that readers + // never see a pointer to an invalid region (i.e one without a proper length header). + void *in_arena = CHECK_NOTNULL(alloc_arena->AllocateBytes(len + sizeof(uint32_t))); + *reinterpret_cast<uint32_t *>(in_arena) = len; + memcpy(reinterpret_cast<uint8_t *>(in_arena) + sizeof(uint32_t), src, len); + set_ptr(in_arena); + } + } + + private: + struct DiscriminatedPointer { + uint8_t discriminator : 8; + uintptr_t pointer : 54; + + bool is_indirect() const { + return discriminator == 0xff; + } + }; + + DiscriminatedPointer LoadValue() const { + if (ATOMIC) { + // Load with "Acquire" semantics -- if we load a pointer, this ensures + // that we also see the pointed-to data. + uintptr_t ptr_val = base::subtle::Acquire_Load( + reinterpret_cast<volatile const AtomicWord *>(buf_)); + return bit_cast<DiscriminatedPointer>(ptr_val); + } else { + DiscriminatedPointer ret; + memcpy(&ret, buf_, sizeof(ret)); + return ret; + } + } + + // Set the internal storage to be an indirect pointer to the given + // address. + void set_ptr(void *ptr) { + uintptr_t ptr_int = reinterpret_cast<uintptr_t>(ptr); + DCHECK_EQ(ptr_int >> (kPointerBitWidth - 8), 0) << + "bad pointer (should have 0x00 MSB): " << ptr; + + DiscriminatedPointer dptr; + dptr.discriminator = 0xff; + dptr.pointer = ptr_int; + + if (ATOMIC) { + // Store with "Release" semantics -- this ensures that the pointed-to data + // is visible to any readers who see this pointer. + uintptr_t to_store = bit_cast<uintptr_t>(dptr); + base::subtle::Release_Store(reinterpret_cast<volatile AtomicWord *>(buf_), + to_store); + } else { + memcpy(&buf_[0], &dptr, sizeof(dptr)); + } + } + + uint8_t buf_[STORAGE_SIZE]; + +} PACKED; + +} // namespace kudu + +#endif
