http://git-wip-us.apache.org/repos/asf/impala/blob/fcf190c4/be/src/kudu/util/hdr_histogram.cc ---------------------------------------------------------------------- diff --git a/be/src/kudu/util/hdr_histogram.cc b/be/src/kudu/util/hdr_histogram.cc new file mode 100644 index 0000000..4907444 --- /dev/null +++ b/be/src/kudu/util/hdr_histogram.cc @@ -0,0 +1,501 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +// +// Portions of these classes were ported from Java to C++ from the sources +// available at https://github.com/HdrHistogram/HdrHistogram . +// +// The code in this repository code was Written by Gil Tene, Michael Barker, +// and Matt Warren, and released to the public domain, as explained at +// http://creativecommons.org/publicdomain/zero/1.0/ +#include "kudu/util/hdr_histogram.h" + +#include <algorithm> +#include <cmath> +#include <limits> +#include <ostream> +#include <string> + +#include <glog/logging.h> + +#include "kudu/gutil/atomicops.h" +#include "kudu/gutil/bits.h" +#include "kudu/gutil/strings/substitute.h" +#include "kudu/util/status.h" + +using base::subtle::Atomic64; +using base::subtle::NoBarrier_AtomicIncrement; +using base::subtle::NoBarrier_Store; +using base::subtle::NoBarrier_Load; +using base::subtle::NoBarrier_CompareAndSwap; +using strings::Substitute; + +namespace kudu { + +HdrHistogram::HdrHistogram(uint64_t highest_trackable_value, int num_significant_digits) + : highest_trackable_value_(highest_trackable_value), + num_significant_digits_(num_significant_digits), + counts_array_length_(0), + bucket_count_(0), + sub_bucket_count_(0), + sub_bucket_half_count_magnitude_(0), + sub_bucket_half_count_(0), + sub_bucket_mask_(0), + total_count_(0), + total_sum_(0), + min_value_(std::numeric_limits<Atomic64>::max()), + max_value_(0), + counts_(nullptr) { + Init(); +} + +HdrHistogram::HdrHistogram(const HdrHistogram& other) + : highest_trackable_value_(other.highest_trackable_value_), + num_significant_digits_(other.num_significant_digits_), + counts_array_length_(0), + bucket_count_(0), + sub_bucket_count_(0), + sub_bucket_half_count_magnitude_(0), + sub_bucket_half_count_(0), + sub_bucket_mask_(0), + total_count_(0), + total_sum_(0), + min_value_(std::numeric_limits<Atomic64>::max()), + max_value_(0), + counts_(nullptr) { + Init(); + + // Not a consistent snapshot but we try to roughly keep it close. + // Copy the sum and min first. + NoBarrier_Store(&total_sum_, NoBarrier_Load(&other.total_sum_)); + NoBarrier_Store(&min_value_, NoBarrier_Load(&other.min_value_)); + + uint64_t total_copied_count = 0; + // Copy the counts in order of ascending magnitude. + for (int i = 0; i < counts_array_length_; i++) { + uint64_t count = NoBarrier_Load(&other.counts_[i]); + NoBarrier_Store(&counts_[i], count); + total_copied_count += count; + } + // Copy the max observed value last. + NoBarrier_Store(&max_value_, NoBarrier_Load(&other.max_value_)); + // We must ensure the total is consistent with the copied counts. + NoBarrier_Store(&total_count_, total_copied_count); +} + +bool HdrHistogram::IsValidHighestTrackableValue(uint64_t highest_trackable_value) { + return highest_trackable_value >= kMinHighestTrackableValue; +} + +bool HdrHistogram::IsValidNumSignificantDigits(int num_significant_digits) { + return num_significant_digits >= kMinValidNumSignificantDigits && + num_significant_digits <= kMaxValidNumSignificantDigits; +} + +void HdrHistogram::Init() { + // Verify parameter validity + CHECK(IsValidHighestTrackableValue(highest_trackable_value_)) << + Substitute("highest_trackable_value must be >= $0", kMinHighestTrackableValue); + CHECK(IsValidNumSignificantDigits(num_significant_digits_)) << + Substitute("num_significant_digits must be between $0 and $1", + kMinValidNumSignificantDigits, kMaxValidNumSignificantDigits); + + uint32_t largest_value_with_single_unit_resolution = + 2 * static_cast<uint32_t>(pow(10.0, num_significant_digits_)); + + // We need to maintain power-of-two sub_bucket_count_ (for clean direct + // indexing) that is large enough to provide unit resolution to at least + // largest_value_with_single_unit_resolution. So figure out + // largest_value_with_single_unit_resolution's nearest power-of-two + // (rounded up), and use that: + + // The sub-buckets take care of the precision. + // Each sub-bucket is sized to have enough bits for the requested + // 10^precision accuracy. + int sub_bucket_count_magnitude = + Bits::Log2Ceiling(largest_value_with_single_unit_resolution); + sub_bucket_half_count_magnitude_ = + (sub_bucket_count_magnitude >= 1) ? sub_bucket_count_magnitude - 1 : 0; + + // sub_bucket_count_ is approx. 10^num_sig_digits (as a power of 2) + sub_bucket_count_ = pow(2.0, sub_bucket_half_count_magnitude_ + 1); + sub_bucket_mask_ = sub_bucket_count_ - 1; + sub_bucket_half_count_ = sub_bucket_count_ / 2; + + // The buckets take care of the magnitude. + // Determine exponent range needed to support the trackable value with no + // overflow: + uint64_t trackable_value = sub_bucket_count_ - 1; + int buckets_needed = 1; + while (trackable_value < highest_trackable_value_) { + trackable_value <<= 1; + buckets_needed++; + } + bucket_count_ = buckets_needed; + + counts_array_length_ = (bucket_count_ + 1) * sub_bucket_half_count_; + counts_.reset(new Atomic64[counts_array_length_]()); // value-initialized +} + +void HdrHistogram::Increment(int64_t value) { + IncrementBy(value, 1); +} + +void HdrHistogram::IncrementBy(int64_t value, int64_t count) { + DCHECK_GE(value, 0); + DCHECK_GE(count, 0); + + // Dissect the value into bucket and sub-bucket parts, and derive index into + // counts array: + int bucket_index = BucketIndex(value); + int sub_bucket_index = SubBucketIndex(value, bucket_index); + int counts_index = CountsArrayIndex(bucket_index, sub_bucket_index); + + // Increment bucket, total, and sum. + NoBarrier_AtomicIncrement(&counts_[counts_index], count); + NoBarrier_AtomicIncrement(&total_count_, count); + NoBarrier_AtomicIncrement(&total_sum_, value * count); + + // Update min, if needed. + { + Atomic64 min_val; + while (PREDICT_FALSE(value < (min_val = MinValue()))) { + Atomic64 old_val = NoBarrier_CompareAndSwap(&min_value_, min_val, value); + if (PREDICT_TRUE(old_val == min_val)) break; // CAS success. + } + } + + // Update max, if needed. + { + Atomic64 max_val; + while (PREDICT_FALSE(value > (max_val = MaxValue()))) { + Atomic64 old_val = NoBarrier_CompareAndSwap(&max_value_, max_val, value); + if (PREDICT_TRUE(old_val == max_val)) break; // CAS success. + } + } +} + +void HdrHistogram::IncrementWithExpectedInterval(int64_t value, + int64_t expected_interval_between_samples) { + Increment(value); + if (expected_interval_between_samples <= 0) { + return; + } + for (int64_t missing_value = value - expected_interval_between_samples; + missing_value >= expected_interval_between_samples; + missing_value -= expected_interval_between_samples) { + Increment(missing_value); + } +} + +//////////////////////////////////// + +int HdrHistogram::BucketIndex(uint64_t value) const { + if (PREDICT_FALSE(value > highest_trackable_value_)) { + value = highest_trackable_value_; + } + // Here we are calculating the power-of-2 magnitude of the value with a + // correction for precision in the first bucket. + // Smallest power of 2 containing value. + int pow2ceiling = Bits::Log2Ceiling64(value | sub_bucket_mask_); + return pow2ceiling - (sub_bucket_half_count_magnitude_ + 1); +} + +int HdrHistogram::SubBucketIndex(uint64_t value, int bucket_index) const { + if (PREDICT_FALSE(value > highest_trackable_value_)) { + value = highest_trackable_value_; + } + // We hack off the magnitude and are left with only the relevant precision + // portion, which gives us a direct index into the sub-bucket. TODO: Right?? + return static_cast<int>(value >> bucket_index); +} + +int HdrHistogram::CountsArrayIndex(int bucket_index, int sub_bucket_index) const { + DCHECK(sub_bucket_index < sub_bucket_count_); + DCHECK(bucket_index < bucket_count_); + DCHECK(bucket_index == 0 || (sub_bucket_index >= sub_bucket_half_count_)); + // Calculate the index for the first entry in the bucket: + // (The following is the equivalent of ((bucket_index + 1) * sub_bucket_half_count_) ): + int bucket_base_index = (bucket_index + 1) << sub_bucket_half_count_magnitude_; + // Calculate the offset in the bucket: + int offset_in_bucket = sub_bucket_index - sub_bucket_half_count_; + return bucket_base_index + offset_in_bucket; +} + +uint64_t HdrHistogram::CountAt(int bucket_index, int sub_bucket_index) const { + return counts_[CountsArrayIndex(bucket_index, sub_bucket_index)]; +} + +uint64_t HdrHistogram::CountInBucketForValue(uint64_t value) const { + int bucket_index = BucketIndex(value); + int sub_bucket_index = SubBucketIndex(value, bucket_index); + return CountAt(bucket_index, sub_bucket_index); +} + +uint64_t HdrHistogram::ValueFromIndex(int bucket_index, int sub_bucket_index) { + return static_cast<uint64_t>(sub_bucket_index) << bucket_index; +} + +//////////////////////////////////// + +uint64_t HdrHistogram::SizeOfEquivalentValueRange(uint64_t value) const { + int bucket_index = BucketIndex(value); + int sub_bucket_index = SubBucketIndex(value, bucket_index); + uint64_t distance_to_next_value = + (1 << ((sub_bucket_index >= sub_bucket_count_) ? (bucket_index + 1) : bucket_index)); + return distance_to_next_value; +} + +uint64_t HdrHistogram::LowestEquivalentValue(uint64_t value) const { + int bucket_index = BucketIndex(value); + int sub_bucket_index = SubBucketIndex(value, bucket_index); + uint64_t this_value_base_level = ValueFromIndex(bucket_index, sub_bucket_index); + return this_value_base_level; +} + +uint64_t HdrHistogram::HighestEquivalentValue(uint64_t value) const { + return NextNonEquivalentValue(value) - 1; +} + +uint64_t HdrHistogram::MedianEquivalentValue(uint64_t value) const { + return (LowestEquivalentValue(value) + (SizeOfEquivalentValueRange(value) >> 1)); +} + +uint64_t HdrHistogram::NextNonEquivalentValue(uint64_t value) const { + return LowestEquivalentValue(value) + SizeOfEquivalentValueRange(value); +} + +bool HdrHistogram::ValuesAreEquivalent(uint64_t value1, uint64_t value2) const { + return (LowestEquivalentValue(value1) == LowestEquivalentValue(value2)); +} + +uint64_t HdrHistogram::MinValue() const { + if (PREDICT_FALSE(TotalCount() == 0)) return 0; + return NoBarrier_Load(&min_value_); +} + +uint64_t HdrHistogram::MaxValue() const { + if (PREDICT_FALSE(TotalCount() == 0)) return 0; + return NoBarrier_Load(&max_value_); +} + +double HdrHistogram::MeanValue() const { + uint64_t count = TotalCount(); + if (PREDICT_FALSE(count == 0)) return 0.0; + return static_cast<double>(TotalSum()) / count; +} + +uint64_t HdrHistogram::ValueAtPercentile(double percentile) const { + uint64_t count = TotalCount(); + if (PREDICT_FALSE(count == 0)) return 0; + + double requested_percentile = std::min(percentile, 100.0); // Truncate down to 100% + uint64_t count_at_percentile = static_cast<uint64_t>( + ((requested_percentile / 100.0) * count) + 0.5); // NOLINT(misc-incorrect-roundings) + // Make sure we at least reach the first recorded entry + count_at_percentile = std::max(count_at_percentile, static_cast<uint64_t>(1)); + + uint64_t total_to_current_iJ = 0; + for (int i = 0; i < bucket_count_; i++) { + int j = (i == 0) ? 0 : (sub_bucket_count_ / 2); + for (; j < sub_bucket_count_; j++) { + total_to_current_iJ += CountAt(i, j); + if (total_to_current_iJ >= count_at_percentile) { + uint64_t valueAtIndex = ValueFromIndex(i, j); + return valueAtIndex; + } + } + } + + LOG(DFATAL) << "Fell through while iterating, likely concurrent modification of histogram"; + return 0; +} + +/////////////////////////////////////////////////////////////////////// +// AbstractHistogramIterator +/////////////////////////////////////////////////////////////////////// + +AbstractHistogramIterator::AbstractHistogramIterator(const HdrHistogram* histogram) + : histogram_(CHECK_NOTNULL(histogram)), + cur_iter_val_(), + histogram_total_count_(histogram_->TotalCount()), + current_bucket_index_(0), + current_sub_bucket_index_(0), + current_value_at_index_(0), + next_bucket_index_(0), + next_sub_bucket_index_(1), + next_value_at_index_(1), + prev_value_iterated_to_(0), + total_count_to_prev_index_(0), + total_count_to_current_index_(0), + total_value_to_current_index_(0), + count_at_this_value_(0), + fresh_sub_bucket_(true) { +} + +bool AbstractHistogramIterator::HasNext() const { + return total_count_to_current_index_ < histogram_total_count_; +} + +Status AbstractHistogramIterator::Next(HistogramIterationValue* value) { + if (histogram_->TotalCount() != histogram_total_count_) { + return Status::IllegalState("Concurrently modified histogram while traversing it"); + } + + // Move through the sub buckets and buckets until we hit the next reporting level: + while (!ExhaustedSubBuckets()) { + count_at_this_value_ = + histogram_->CountAt(current_bucket_index_, current_sub_bucket_index_); + if (fresh_sub_bucket_) { // Don't add unless we've incremented since last bucket... + total_count_to_current_index_ += count_at_this_value_; + total_value_to_current_index_ += + count_at_this_value_ * histogram_->MedianEquivalentValue(current_value_at_index_); + fresh_sub_bucket_ = false; + } + if (ReachedIterationLevel()) { + uint64_t value_iterated_to = ValueIteratedTo(); + + // Update iterator value. + cur_iter_val_.value_iterated_to = value_iterated_to; + cur_iter_val_.value_iterated_from = prev_value_iterated_to_; + cur_iter_val_.count_at_value_iterated_to = count_at_this_value_; + cur_iter_val_.count_added_in_this_iteration_step = + (total_count_to_current_index_ - total_count_to_prev_index_); + cur_iter_val_.total_count_to_this_value = total_count_to_current_index_; + cur_iter_val_.total_value_to_this_value = total_value_to_current_index_; + cur_iter_val_.percentile = + ((100.0 * total_count_to_current_index_) / histogram_total_count_); + cur_iter_val_.percentile_level_iterated_to = PercentileIteratedTo(); + + prev_value_iterated_to_ = value_iterated_to; + total_count_to_prev_index_ = total_count_to_current_index_; + // Move the next percentile reporting level forward. + IncrementIterationLevel(); + + *value = cur_iter_val_; + return Status::OK(); + } + IncrementSubBucket(); + } + return Status::IllegalState("Histogram array index out of bounds while traversing"); +} + +double AbstractHistogramIterator::PercentileIteratedTo() const { + return (100.0 * static_cast<double>(total_count_to_current_index_)) / histogram_total_count_; +} + +double AbstractHistogramIterator::PercentileIteratedFrom() const { + return (100.0 * static_cast<double>(total_count_to_prev_index_)) / histogram_total_count_; +} + +uint64_t AbstractHistogramIterator::ValueIteratedTo() const { + return histogram_->HighestEquivalentValue(current_value_at_index_); +} + +bool AbstractHistogramIterator::ExhaustedSubBuckets() const { + return (current_bucket_index_ >= histogram_->bucket_count_); +} + +void AbstractHistogramIterator::IncrementSubBucket() { + fresh_sub_bucket_ = true; + // Take on the next index: + current_bucket_index_ = next_bucket_index_; + current_sub_bucket_index_ = next_sub_bucket_index_; + current_value_at_index_ = next_value_at_index_; + // Figure out the next next index: + next_sub_bucket_index_++; + if (next_sub_bucket_index_ >= histogram_->sub_bucket_count_) { + next_sub_bucket_index_ = histogram_->sub_bucket_half_count_; + next_bucket_index_++; + } + next_value_at_index_ = HdrHistogram::ValueFromIndex(next_bucket_index_, next_sub_bucket_index_); +} + +/////////////////////////////////////////////////////////////////////// +// RecordedValuesIterator +/////////////////////////////////////////////////////////////////////// + +RecordedValuesIterator::RecordedValuesIterator(const HdrHistogram* histogram) + : AbstractHistogramIterator(histogram), + visited_sub_bucket_index_(-1), + visited_bucket_index_(-1) { +} + +void RecordedValuesIterator::IncrementIterationLevel() { + visited_sub_bucket_index_ = current_sub_bucket_index_; + visited_bucket_index_ = current_bucket_index_; +} + +bool RecordedValuesIterator::ReachedIterationLevel() const { + uint64_t current_ij_count = + histogram_->CountAt(current_bucket_index_, current_sub_bucket_index_); + return current_ij_count != 0 && + ((visited_sub_bucket_index_ != current_sub_bucket_index_) || + (visited_bucket_index_ != current_bucket_index_)); +} + +/////////////////////////////////////////////////////////////////////// +// PercentileIterator +/////////////////////////////////////////////////////////////////////// + +PercentileIterator::PercentileIterator(const HdrHistogram* histogram, + int percentile_ticks_per_half_distance) + : AbstractHistogramIterator(histogram), + percentile_ticks_per_half_distance_(percentile_ticks_per_half_distance), + percentile_level_to_iterate_to_(0.0), + percentile_level_to_iterate_from_(0.0), + reached_last_recorded_value_(false) { +} + +bool PercentileIterator::HasNext() const { + if (AbstractHistogramIterator::HasNext()) { + return true; + } + // We want one additional last step to 100% + if (!reached_last_recorded_value_ && (histogram_total_count_ > 0)) { + const_cast<PercentileIterator*>(this)->percentile_level_to_iterate_to_ = 100.0; + const_cast<PercentileIterator*>(this)->reached_last_recorded_value_ = true; + return true; + } + return false; +} + +double PercentileIterator::PercentileIteratedTo() const { + return percentile_level_to_iterate_to_; +} + + +double PercentileIterator::PercentileIteratedFrom() const { + return percentile_level_to_iterate_from_; +} + +void PercentileIterator::IncrementIterationLevel() { + percentile_level_to_iterate_from_ = percentile_level_to_iterate_to_; + // TODO: Can this expression be simplified? + uint64_t percentile_reporting_ticks = percentile_ticks_per_half_distance_ * + static_cast<uint64_t>(pow(2.0, + static_cast<int>(log(100.0 / (100.0 - (percentile_level_to_iterate_to_))) / log(2)) + 1)); + percentile_level_to_iterate_to_ += 100.0 / percentile_reporting_ticks; +} + +bool PercentileIterator::ReachedIterationLevel() const { + if (count_at_this_value_ == 0) return false; + double current_percentile = + (100.0 * static_cast<double>(total_count_to_current_index_)) / histogram_total_count_; + return (current_percentile >= percentile_level_to_iterate_to_); +} + +} // namespace kudu
http://git-wip-us.apache.org/repos/asf/impala/blob/fcf190c4/be/src/kudu/util/hdr_histogram.h ---------------------------------------------------------------------- diff --git a/be/src/kudu/util/hdr_histogram.h b/be/src/kudu/util/hdr_histogram.h new file mode 100644 index 0000000..14b5e95 --- /dev/null +++ b/be/src/kudu/util/hdr_histogram.h @@ -0,0 +1,351 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#ifndef KUDU_UTIL_HDRHISTOGRAM_H_ +#define KUDU_UTIL_HDRHISTOGRAM_H_ + +// C++ (TR1) port of HdrHistogram. +// +// Portions of these classes were ported from Java to C++ from the sources +// available at https://github.com/HdrHistogram/HdrHistogram . +// +// The code in this repository code was Written by Gil Tene, Michael Barker, +// and Matt Warren, and released to the public domain, as explained at +// http://creativecommons.org/publicdomain/zero/1.0/ +// --------------------------------------------------------------------------- +// +// A High Dynamic Range (HDR) Histogram +// +// HdrHistogram supports the recording and analyzing sampled data value counts +// across a configurable integer value range with configurable value precision +// within the range. Value precision is expressed as the number of significant +// digits in the value recording, and provides control over value quantization +// behavior across the value range and the subsequent value resolution at any +// given level. +// +// For example, a Histogram could be configured to track the counts of observed +// integer values between 0 and 3,600,000,000 while maintaining a value +// precision of 3 significant digits across that range. Value quantization +// within the range will thus be no larger than 1/1,000th (or 0.1%) of any +// value. This example Histogram could be used to track and analyze the counts +// of observed response times ranging between 1 microsecond and 1 hour in +// magnitude, while maintaining a value resolution of 1 microsecond up to 1 +// millisecond, a resolution of 1 millisecond (or better) up to one second, and +// a resolution of 1 second (or better) up to 1,000 seconds. At it's maximum +// tracked value (1 hour), it would still maintain a resolution of 3.6 seconds +// (or better). + +#include <stdint.h> + +#include "kudu/gutil/atomicops.h" +#include "kudu/gutil/gscoped_ptr.h" +#include "kudu/gutil/macros.h" +#include "kudu/gutil/port.h" + +namespace kudu { + +class Status; + +// This implementation allows you to specify a range and accuracy (significant +// digits) to support in an instance of a histogram. The class takes care of +// the rest. At this time, only uint64_t values are supported. +// +// An HdrHistogram consists of a set of buckets, which bucket the magnitude of +// a value stored, and a set of sub-buckets, which implement the tunable +// precision of the storage. So if you specify 3 significant digits of +// precision, then you will get about 10^3 sub-buckets (as a power of 2) for +// each level of magnitude. Magnitude buckets are tracked in powers of 2. +// +// This class is thread-safe. +class HdrHistogram { + public: + // Specify the highest trackable value so that the class has a bound on the + // number of buckets, and # of significant digits (in decimal) so that the + // class can determine the granularity of those buckets. + HdrHistogram(uint64_t highest_trackable_value, int num_significant_digits); + + // Copy-construct a (non-consistent) snapshot of other. + explicit HdrHistogram(const HdrHistogram& other); + + // Validate your params before trying to construct the object. + static bool IsValidHighestTrackableValue(uint64_t highest_trackable_value); + static bool IsValidNumSignificantDigits(int num_significant_digits); + + // Record new data. + void Increment(int64_t value); + void IncrementBy(int64_t value, int64_t count); + + // Record new data, correcting for "coordinated omission". + // + // See https://groups.google.com/d/msg/mechanical-sympathy/icNZJejUHfE/BfDekfBEs_sJ + // for more details. + void IncrementWithExpectedInterval(int64_t value, + int64_t expected_interval_between_samples); + + // Fetch configuration params. + uint64_t highest_trackable_value() const { return highest_trackable_value_; } + int num_significant_digits() const { return num_significant_digits_; } + + // Get indexes into histogram based on value. + int BucketIndex(uint64_t value) const; + int SubBucketIndex(uint64_t value, int bucket_index) const; + + // Count of all events recorded. + uint64_t TotalCount() const { return base::subtle::NoBarrier_Load(&total_count_); } + + // Sum of all events recorded. + uint64_t TotalSum() const { return base::subtle::NoBarrier_Load(&total_sum_); } + + // Return number of items at index. + uint64_t CountAt(int bucket_index, int sub_bucket_index) const; + + // Return count of values in bucket with values equivalent to value. + uint64_t CountInBucketForValue(uint64_t) const; + + // Return representative value based on index. + static uint64_t ValueFromIndex(int bucket_index, int sub_bucket_index); + + // Get the size (in value units) of the range of values that are equivalent + // to the given value within the histogram's resolution. Where "equivalent" + // means that value samples recorded for any two equivalent values are + // counted in a common total count. + uint64_t SizeOfEquivalentValueRange(uint64_t value) const; + + // Get the lowest value that is equivalent to the given value within the + // histogram's resolution. Where "equivalent" means that value samples + // recorded for any two equivalent values are counted in a common total + // count. + uint64_t LowestEquivalentValue(uint64_t value) const; + + // Get the highest value that is equivalent to the given value within the + // histogram's resolution. + uint64_t HighestEquivalentValue(uint64_t value) const; + + // Get a value that lies in the middle (rounded up) of the range of values + // equivalent the given value. + uint64_t MedianEquivalentValue(uint64_t value) const; + + // Get the next value that is not equivalent to the given value within the + // histogram's resolution. + uint64_t NextNonEquivalentValue(uint64_t value) const; + + // Determine if two values are equivalent with the histogram's resolution. + bool ValuesAreEquivalent(uint64_t value1, uint64_t value2) const; + + // Get the exact minimum value (may lie outside the histogram). + uint64_t MinValue() const; + + // Get the exact maximum value (may lie outside the histogram). + uint64_t MaxValue() const; + + // Get the exact mean value of all recorded values in the histogram. + double MeanValue() const; + + // Get the value at a given percentile. + // This is a percentile in percents, i.e. 99.99 percentile. + uint64_t ValueAtPercentile(double percentile) const; + + // Get the percentile at a given value + // TODO: implement + // double PercentileAtOrBelowValue(uint64_t value) const; + + // Get the count of recorded values within a range of value levels. + // (inclusive to within the histogram's resolution) + // TODO: implement + //uint64_t CountBetweenValues(uint64_t low_value, uint64_t high_value) const; + + private: + friend class AbstractHistogramIterator; + + static const uint64_t kMinHighestTrackableValue = 2; + static const int kMinValidNumSignificantDigits = 1; + static const int kMaxValidNumSignificantDigits = 5; + + void Init(); + int CountsArrayIndex(int bucket_index, int sub_bucket_index) const; + + uint64_t highest_trackable_value_; + int num_significant_digits_; + int counts_array_length_; + int bucket_count_; + int sub_bucket_count_; + + // "Hot" fields in the write path. + uint8_t sub_bucket_half_count_magnitude_; + int sub_bucket_half_count_; + uint32_t sub_bucket_mask_; + + // Also hot. + base::subtle::Atomic64 total_count_; + base::subtle::Atomic64 total_sum_; + base::subtle::Atomic64 min_value_; + base::subtle::Atomic64 max_value_; + gscoped_array<base::subtle::Atomic64> counts_; + + HdrHistogram& operator=(const HdrHistogram& other); // Disable assignment operator. +}; + +// Value returned from iterators. +struct HistogramIterationValue { + HistogramIterationValue() + : value_iterated_to(0), + value_iterated_from(0), + count_at_value_iterated_to(0), + count_added_in_this_iteration_step(0), + total_count_to_this_value(0), + total_value_to_this_value(0), + percentile(0.0), + percentile_level_iterated_to(0.0) { + } + + void Reset() { + value_iterated_to = 0; + value_iterated_from = 0; + count_at_value_iterated_to = 0; + count_added_in_this_iteration_step = 0; + total_count_to_this_value = 0; + total_value_to_this_value = 0; + percentile = 0.0; + percentile_level_iterated_to = 0.0; + } + + uint64_t value_iterated_to; + uint64_t value_iterated_from; + uint64_t count_at_value_iterated_to; + uint64_t count_added_in_this_iteration_step; + uint64_t total_count_to_this_value; + uint64_t total_value_to_this_value; + double percentile; + double percentile_level_iterated_to; +}; + +// Base class for iterating through histogram values. +// +// The underlying histogram must not be modified or destroyed while this class +// is iterating over it. +// +// This class is not thread-safe. +class AbstractHistogramIterator { + public: + // Create iterator with new histogram. + // The histogram must not be mutated while the iterator is in use. + explicit AbstractHistogramIterator(const HdrHistogram* histogram); + virtual ~AbstractHistogramIterator() { + } + + // Returns true if the iteration has more elements. + virtual bool HasNext() const; + + // Returns the next element in the iteration. + Status Next(HistogramIterationValue* value); + + virtual double PercentileIteratedTo() const; + virtual double PercentileIteratedFrom() const; + uint64_t ValueIteratedTo() const; + + protected: + // Implementations must override these methods. + virtual void IncrementIterationLevel() = 0; + virtual bool ReachedIterationLevel() const = 0; + + const HdrHistogram* histogram_; + HistogramIterationValue cur_iter_val_; + + uint64_t histogram_total_count_; + + int current_bucket_index_; + int current_sub_bucket_index_; + uint64_t current_value_at_index_; + + int next_bucket_index_; + int next_sub_bucket_index_; + uint64_t next_value_at_index_; + + uint64_t prev_value_iterated_to_; + uint64_t total_count_to_prev_index_; + + uint64_t total_count_to_current_index_; + uint64_t total_value_to_current_index_; + + uint64_t count_at_this_value_; + + private: + bool ExhaustedSubBuckets() const; + void IncrementSubBucket(); + + bool fresh_sub_bucket_; + + DISALLOW_COPY_AND_ASSIGN(AbstractHistogramIterator); +}; + +// Used for iterating through all recorded histogram values using the finest +// granularity steps supported by the underlying representation. The iteration +// steps through all non-zero recorded value counts, and terminates when all +// recorded histogram values are exhausted. +// +// The underlying histogram must not be modified or destroyed while this class +// is iterating over it. +// +// This class is not thread-safe. +class RecordedValuesIterator : public AbstractHistogramIterator { + public: + explicit RecordedValuesIterator(const HdrHistogram* histogram); + + protected: + virtual void IncrementIterationLevel() OVERRIDE; + virtual bool ReachedIterationLevel() const OVERRIDE; + + private: + int visited_sub_bucket_index_; + int visited_bucket_index_; + + DISALLOW_COPY_AND_ASSIGN(RecordedValuesIterator); +}; + +// Used for iterating through histogram values according to percentile levels. +// The iteration is performed in steps that start at 0% and reduce their +// distance to 100% according to the percentileTicksPerHalfDistance parameter, +// ultimately reaching 100% when all recorded histogram values are exhausted. +// +// The underlying histogram must not be modified or destroyed while this class +// is iterating over it. +// +// This class is not thread-safe. +class PercentileIterator : public AbstractHistogramIterator { + public: + // TODO: Explain percentile_ticks_per_half_distance. + PercentileIterator(const HdrHistogram* histogram, + int percentile_ticks_per_half_distance); + virtual bool HasNext() const OVERRIDE; + virtual double PercentileIteratedTo() const OVERRIDE; + virtual double PercentileIteratedFrom() const OVERRIDE; + + protected: + virtual void IncrementIterationLevel() OVERRIDE; + virtual bool ReachedIterationLevel() const OVERRIDE; + + private: + int percentile_ticks_per_half_distance_; + double percentile_level_to_iterate_to_; + double percentile_level_to_iterate_from_; + bool reached_last_recorded_value_; + + DISALLOW_COPY_AND_ASSIGN(PercentileIterator); +}; + +} // namespace kudu + +#endif // KUDU_UTIL_HDRHISTOGRAM_H_ http://git-wip-us.apache.org/repos/asf/impala/blob/fcf190c4/be/src/kudu/util/hexdump.cc ---------------------------------------------------------------------- diff --git a/be/src/kudu/util/hexdump.cc b/be/src/kudu/util/hexdump.cc new file mode 100644 index 0000000..ddecd9c --- /dev/null +++ b/be/src/kudu/util/hexdump.cc @@ -0,0 +1,85 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "kudu/util/hexdump.h" + +#include <algorithm> +#include <cctype> +#include <cstdint> +#include <string> + +#include <glog/logging.h> + +#include "kudu/gutil/stringprintf.h" +#include "kudu/util/logging.h" +#include "kudu/util/slice.h" + +namespace kudu { + +std::string HexDump(const Slice &slice) { + if (KUDU_SHOULD_REDACT()) { + return kRedactionMessage; + } + + std::string output; + output.reserve(slice.size() * 5); + + const uint8_t *p = slice.data(); + + int rem = slice.size(); + while (rem > 0) { + const uint8_t *line_p = p; + int line_len = std::min(rem, 16); + int line_rem = line_len; + StringAppendF(&output, "%06lx: ", line_p - slice.data()); + + while (line_rem >= 2) { + StringAppendF(&output, "%02x%02x ", + p[0] & 0xff, p[1] & 0xff); + p += 2; + line_rem -= 2; + } + + if (line_rem == 1) { + StringAppendF(&output, "%02x ", + p[0] & 0xff); + p += 1; + line_rem -= 1; + } + DCHECK_EQ(line_rem, 0); + + int padding = (16 - line_len) / 2; + + for (int i = 0; i < padding; i++) { + output.append(" "); + } + + for (int i = 0; i < line_len; i++) { + char c = line_p[i]; + if (isprint(c)) { + output.push_back(c); + } else { + output.push_back('.'); + } + } + + output.push_back('\n'); + rem -= line_len; + } + return output; +} +} // namespace kudu http://git-wip-us.apache.org/repos/asf/impala/blob/fcf190c4/be/src/kudu/util/hexdump.h ---------------------------------------------------------------------- diff --git a/be/src/kudu/util/hexdump.h b/be/src/kudu/util/hexdump.h new file mode 100644 index 0000000..eacfad2 --- /dev/null +++ b/be/src/kudu/util/hexdump.h @@ -0,0 +1,34 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#ifndef KUDU_UTIL_HEXDUMP_H +#define KUDU_UTIL_HEXDUMP_H + +#include <string> + +namespace kudu { + +class Slice; + +// Generate an 'xxd'-style hexdump of the given slice. This should only be used +// for debugging, as the format is subject to change and it has not been +// implemented for speed. +// +// The returned string will be redacted if redaction is enabled. +std::string HexDump(const Slice &slice); + +} // namespace kudu +#endif http://git-wip-us.apache.org/repos/asf/impala/blob/fcf190c4/be/src/kudu/util/high_water_mark.h ---------------------------------------------------------------------- diff --git a/be/src/kudu/util/high_water_mark.h b/be/src/kudu/util/high_water_mark.h new file mode 100644 index 0000000..dfc30e4 --- /dev/null +++ b/be/src/kudu/util/high_water_mark.h @@ -0,0 +1,85 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#ifndef KUDU_UTIL_HIGH_WATER_MARK_H +#define KUDU_UTIL_HIGH_WATER_MARK_H + +#include "kudu/gutil/macros.h" +#include "kudu/util/atomic.h" + +namespace kudu { + +// Lock-free integer that keeps track of the highest value seen. +// Similar to Impala's RuntimeProfile::HighWaterMarkCounter. +// HighWaterMark::max_value() returns the highest value seen; +// HighWaterMark::current_value() returns the current value. +class HighWaterMark { + public: + explicit HighWaterMark(int64_t initial_value) + : current_value_(initial_value), + max_value_(initial_value) { + } + + // Return the current value. + int64_t current_value() const { + return current_value_.Load(kMemOrderNoBarrier); + } + + // Return the max value. + int64_t max_value() const { + return max_value_.Load(kMemOrderNoBarrier); + } + + // If current value + 'delta' is <= 'max', increment current value + // by 'delta' and return true; return false otherwise. + bool TryIncrementBy(int64_t delta, int64_t max) { + while (true) { + int64_t old_val = current_value(); + int64_t new_val = old_val + delta; + if (new_val > max) { + return false; + } + if (PREDICT_TRUE(current_value_.CompareAndSet(old_val, + new_val, + kMemOrderNoBarrier))) { + UpdateMax(new_val); + return true; + } + } + } + + void IncrementBy(int64_t amount) { + UpdateMax(current_value_.IncrementBy(amount, kMemOrderNoBarrier)); + } + + void set_value(int64_t v) { + current_value_.Store(v, kMemOrderNoBarrier); + UpdateMax(v); + } + + private: + void UpdateMax(int64_t value) { + max_value_.StoreMax(value, kMemOrderNoBarrier); + } + + AtomicInt<int64_t> current_value_; + AtomicInt<int64_t> max_value_; +}; + +} // namespace kudu +#endif /* KUDU_UTIL_HIGH_WATER_MARK_H */ + + http://git-wip-us.apache.org/repos/asf/impala/blob/fcf190c4/be/src/kudu/util/histogram.proto ---------------------------------------------------------------------- diff --git a/be/src/kudu/util/histogram.proto b/be/src/kudu/util/histogram.proto new file mode 100644 index 0000000..e4526e7 --- /dev/null +++ b/be/src/kudu/util/histogram.proto @@ -0,0 +1,48 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +syntax = "proto2"; +package kudu; + +option java_package = "org.apache.kudu"; + +// Captures the state of an Histogram. +message HistogramSnapshotPB { + required string type = 1; + required string name = 2; + optional string description = 3; + required string unit = 4; + optional string label = 19; + + required uint64 max_trackable_value = 5; + required int32 num_significant_digits = 6; + required uint64 total_count = 7; + optional uint64 total_sum = 18; + required uint64 min = 8; + required double mean = 9; + required uint64 percentile_75 = 10; + required uint64 percentile_95 = 11; + required uint64 percentile_99 = 12; + required uint64 percentile_99_9 = 13; + required uint64 percentile_99_99 = 14; + required uint64 max = 15; + repeated uint64 values = 16 [packed = true]; + repeated uint64 counts = 17 [packed = true]; +} + +message HistogramSnapshotsListPB { + repeated HistogramSnapshotPB histograms = 1; +} http://git-wip-us.apache.org/repos/asf/impala/blob/fcf190c4/be/src/kudu/util/init.cc ---------------------------------------------------------------------- diff --git a/be/src/kudu/util/init.cc b/be/src/kudu/util/init.cc new file mode 100644 index 0000000..bd97d79 --- /dev/null +++ b/be/src/kudu/util/init.cc @@ -0,0 +1,89 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "kudu/util/init.h" + +#include <fcntl.h> +#include <unistd.h> + +#include <cstdlib> +#include <string> + +#include <glog/logging.h> + +#include "kudu/gutil/cpu.h" +#include "kudu/gutil/strings/substitute.h" +#include "kudu/util/status.h" + +using std::string; + +namespace kudu { + +Status BadCPUStatus(const base::CPU& cpu, const char* instruction_set) { + return Status::NotSupported(strings::Substitute( + "The CPU on this system ($0) does not support the $1 instruction " + "set which is required for running Kudu. If you are running inside a VM, " + "you may need to enable SSE4.2 pass-through.", + cpu.cpu_brand(), instruction_set)); +} + +bool IsFdOpen(int fd) { + return fcntl(fd, F_GETFL) != -1; +} + +// Checks that the standard file descriptors are open when the process +// starts. +// +// If these descriptors aren't open, we can run into serious issues: +// we later might open some other files which end up reusing the same +// file descriptor numbers as stderr, and then some library like glog +// may decide to write a log message to what it thinks is stderr. That +// would then overwrite one of our important data files and cause +// corruption! +void CheckStandardFds() { + if (!IsFdOpen(STDIN_FILENO) || + !IsFdOpen(STDOUT_FILENO) || + !IsFdOpen(STDERR_FILENO)) { + // We can't use LOG(FATAL) here because glog isn't initialized yet, and even if it + // were, it would try to write to stderr, which might end up writing the log message + // into some unexpected place. This is a rare enough issue that people can deal with + // the core dump. + abort(); + } +} + +Status CheckCPUFlags() { + base::CPU cpu; + if (!cpu.has_sse42()) { + return BadCPUStatus(cpu, "SSE4.2"); + } + + if (!cpu.has_ssse3()) { + return BadCPUStatus(cpu, "SSSE3"); + } + + return Status::OK(); +} + +void InitKuduOrDie() { + CheckStandardFds(); + CHECK_OK(CheckCPUFlags()); + // NOTE: this function is called before flags are parsed. + // Do not add anything in here which is flag-dependent. +} + +} // namespace kudu http://git-wip-us.apache.org/repos/asf/impala/blob/fcf190c4/be/src/kudu/util/init.h ---------------------------------------------------------------------- diff --git a/be/src/kudu/util/init.h b/be/src/kudu/util/init.h new file mode 100644 index 0000000..84e36e1 --- /dev/null +++ b/be/src/kudu/util/init.h @@ -0,0 +1,33 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#ifndef KUDU_UTIL_INIT_H +#define KUDU_UTIL_INIT_H + +#include "kudu/util/status.h" + +namespace kudu { + +// Return a NotSupported Status if the current CPU does not support the CPU flags +// required for Kudu. +Status CheckCPUFlags(); + +// Initialize Kudu, checking that the platform we are running on is supported, etc. +// Issues a FATAL log message if we fail to init. +void InitKuduOrDie(); + +} // namespace kudu +#endif /* KUDU_UTIL_INIT_H */ http://git-wip-us.apache.org/repos/asf/impala/blob/fcf190c4/be/src/kudu/util/inline_slice-test.cc ---------------------------------------------------------------------- diff --git a/be/src/kudu/util/inline_slice-test.cc b/be/src/kudu/util/inline_slice-test.cc new file mode 100644 index 0000000..60a0005 --- /dev/null +++ b/be/src/kudu/util/inline_slice-test.cc @@ -0,0 +1,88 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <cstddef> +#include <cstdint> +#include <string> + +#include <gtest/gtest.h> + +#include "kudu/gutil/gscoped_ptr.h" +#include "kudu/util/inline_slice.h" +#include "kudu/util/memory/arena.h" +#include "kudu/util/slice.h" + +namespace kudu { + +template<size_t N> +static void TestRoundTrip(InlineSlice<N> *slice, + Arena *arena, + size_t test_size) { + gscoped_ptr<uint8_t[]> buf(new uint8_t[test_size]); + for (int i = 0; i < test_size; i++) { + buf[i] = i & 0xff; + } + + Slice test_input(buf.get(), test_size); + + slice->set(test_input, arena); + Slice ret = slice->as_slice(); + ASSERT_TRUE(ret == test_input) + << "test_size =" << test_size << "\n" + << "ret = " << ret.ToDebugString() << "\n" + << "test_input = " << test_input.ToDebugString(); + + // If the data is small enough to fit inline, then + // the returned slice should point directly into the + // InlineSlice object. + if (test_size < N) { + ASSERT_EQ(reinterpret_cast<const uint8_t *>(slice) + 1, + ret.data()); + } +} + +// Sweep a variety of inputs for a given size of inline +// data +template<size_t N> +static void DoTest() { + Arena arena(1024); + + // Test a range of inputs both growing and shrinking + InlineSlice<N> my_slice; + ASSERT_EQ(N, sizeof(my_slice)); + + for (size_t to_test = 0; to_test < 1000; to_test++) { + TestRoundTrip(&my_slice, &arena, to_test); + } + for (size_t to_test = 1000; to_test > 0; to_test--) { + TestRoundTrip(&my_slice, &arena, to_test); + } +} + +TEST(TestInlineSlice, Test8ByteInline) { + DoTest<8>(); +} + +TEST(TestInlineSlice, Test12ByteInline) { + DoTest<12>(); +} + +TEST(TestInlineSlice, Test16ByteInline) { + DoTest<16>(); +} + +} // namespace kudu http://git-wip-us.apache.org/repos/asf/impala/blob/fcf190c4/be/src/kudu/util/inline_slice.h ---------------------------------------------------------------------- diff --git a/be/src/kudu/util/inline_slice.h b/be/src/kudu/util/inline_slice.h new file mode 100644 index 0000000..248f5b1 --- /dev/null +++ b/be/src/kudu/util/inline_slice.h @@ -0,0 +1,181 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#ifndef KUDU_UTIL_INLINE_SLICE_H +#define KUDU_UTIL_INLINE_SLICE_H + +#include "kudu/gutil/atomicops.h" +#include "kudu/gutil/casts.h" +#include "kudu/util/memory/arena.h" + +namespace kudu { + +#if __BYTE_ORDER != __LITTLE_ENDIAN +#error This needs to be ported for big endian +#endif + +// Class which represents short strings inline, and stores longer ones +// by instead storing a pointer. +// +// Internal format: +// The buffer must be at least as large as a pointer (eg 8 bytes for 64-bit). +// Let ptr = bit-casting the first 8 bytes as a pointer: +// If buf_[0] < 0xff: +// buf_[0] == length of stored data +// buf_[1..1 + buf_[0]] == inline data +// If buf_[0] == 0xff: +// buf_[1..sizeof(uint8_t *)] == pointer to indirect data, minus the MSB. +// buf_[sizeof(uint8_t *)..] = unused +// TODO: we could store a prefix of the indirect data in this unused space +// in the future, which might be able to short-circuit some comparisons +// +// The indirect data which is pointed to is stored as a 4 byte length followed by +// the actual data. +// +// This class relies on the fact that the most significant bit of any x86 pointer is +// 0 (i.e pointers only use the bottom 48 bits) +// +// If ATOMIC is true, then this class has the semantics that readers will never see +// invalid pointers, even in the case of concurrent access. However, they _may_ see +// invalid *data*. That is to say, calling 'as_slice()' will always return a slice +// which points to a valid memory region -- the memory region may contain garbage +// but will not cause a segfault on access. +// +// These ATOMIC semantics may seem too loose to be useful, but can be used in +// optimistic concurrency control schemes -- so long as accessing the slice doesn't +// produce a segfault, it's OK to read bad data on a race because the higher-level +// concurrency control will cause a retry. +template<size_t STORAGE_SIZE, bool ATOMIC = false> +class InlineSlice { + private: + enum { + kPointerByteWidth = sizeof(uintptr_t), + kPointerBitWidth = kPointerByteWidth * 8, + kMaxInlineData = STORAGE_SIZE - 1 + }; + + static_assert(STORAGE_SIZE >= kPointerByteWidth, + "InlineSlice storage size must be greater than the width of a pointer"); + static_assert(STORAGE_SIZE <= 256, + "InlineSlice storage size must be less than 256 bytes"); + public: + InlineSlice() { + } + + inline const Slice as_slice() const ATTRIBUTE_ALWAYS_INLINE { + DiscriminatedPointer dptr = LoadValue(); + + if (dptr.is_indirect()) { + const uint8_t *indir_data = reinterpret_cast<const uint8_t *>(dptr.pointer); + uint32_t len = *reinterpret_cast<const uint32_t *>(indir_data); + indir_data += sizeof(uint32_t); + return Slice(indir_data, static_cast<size_t>(len)); + } + uint8_t len = dptr.discriminator; + DCHECK_LE(len, STORAGE_SIZE - 1); + return Slice(&buf_[1], len); + } + + template<class ArenaType> + void set(const Slice &src, ArenaType *alloc_arena) { + set(src.data(), src.size(), alloc_arena); + } + + template<class ArenaType> + void set(const uint8_t *src, size_t len, + ArenaType *alloc_arena) { + if (len <= kMaxInlineData) { + if (ATOMIC) { + // If atomic, we need to make sure that we store the discriminator + // before we copy in any data. Otherwise the data would overwrite + // part of a pointer and a reader might see an invalid address. + DiscriminatedPointer dptr; + dptr.discriminator = len; + dptr.pointer = 0; // will be overwritten + // "Acquire" ensures that the later memcpy doesn't reorder above the + // set of the discriminator bit. + base::subtle::Acquire_Store(reinterpret_cast<volatile AtomicWord *>(buf_), + bit_cast<uintptr_t>(dptr)); + } else { + buf_[0] = len; + } + memcpy(&buf_[1], src, len); + + } else { + // TODO: if already indirect and the current storage has enough space, just reuse that. + + // Set up the pointed-to data before setting a pointer to it. This ensures that readers + // never see a pointer to an invalid region (i.e one without a proper length header). + void *in_arena = CHECK_NOTNULL(alloc_arena->AllocateBytes(len + sizeof(uint32_t))); + *reinterpret_cast<uint32_t *>(in_arena) = len; + memcpy(reinterpret_cast<uint8_t *>(in_arena) + sizeof(uint32_t), src, len); + set_ptr(in_arena); + } + } + + private: + struct DiscriminatedPointer { + uint8_t discriminator : 8; + uintptr_t pointer : 54; + + bool is_indirect() const { + return discriminator == 0xff; + } + }; + + DiscriminatedPointer LoadValue() const { + if (ATOMIC) { + // Load with "Acquire" semantics -- if we load a pointer, this ensures + // that we also see the pointed-to data. + uintptr_t ptr_val = base::subtle::Acquire_Load( + reinterpret_cast<volatile const AtomicWord *>(buf_)); + return bit_cast<DiscriminatedPointer>(ptr_val); + } else { + DiscriminatedPointer ret; + memcpy(&ret, buf_, sizeof(ret)); + return ret; + } + } + + // Set the internal storage to be an indirect pointer to the given + // address. + void set_ptr(void *ptr) { + uintptr_t ptr_int = reinterpret_cast<uintptr_t>(ptr); + DCHECK_EQ(ptr_int >> (kPointerBitWidth - 8), 0) << + "bad pointer (should have 0x00 MSB): " << ptr; + + DiscriminatedPointer dptr; + dptr.discriminator = 0xff; + dptr.pointer = ptr_int; + + if (ATOMIC) { + // Store with "Release" semantics -- this ensures that the pointed-to data + // is visible to any readers who see this pointer. + uintptr_t to_store = bit_cast<uintptr_t>(dptr); + base::subtle::Release_Store(reinterpret_cast<volatile AtomicWord *>(buf_), + to_store); + } else { + memcpy(&buf_[0], &dptr, sizeof(dptr)); + } + } + + uint8_t buf_[STORAGE_SIZE]; + +} PACKED; + +} // namespace kudu + +#endif http://git-wip-us.apache.org/repos/asf/impala/blob/fcf190c4/be/src/kudu/util/int128-test.cc ---------------------------------------------------------------------- diff --git a/be/src/kudu/util/int128-test.cc b/be/src/kudu/util/int128-test.cc new file mode 100644 index 0000000..cc1e174 --- /dev/null +++ b/be/src/kudu/util/int128-test.cc @@ -0,0 +1,69 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <cstddef> +#include <cstdint> +#include <iosfwd> +#include <string> + +#include <gtest/gtest.h> + +#include "kudu/gutil/macros.h" +#include "kudu/util/int128.h" +#include "kudu/util/int128_util.h" + +using std::string; + +namespace kudu { + +TEST(TestInt128, TestOstreamSigned) { + int128_t INTEGERS[] = {0, -1, 1, -1234567890, + INT64_MIN, UINT64_MAX, + INT128_MIN, + INT128_MAX}; + std::string STRINGS[] = {"0", "-1", "1", "-1234567890", + "-9223372036854775808", "18446744073709551615", + "-170141183460469231731687303715884105728", + "170141183460469231731687303715884105727"}; + for (size_t i = 0; i < arraysize(INTEGERS); i++) { + std::ostringstream ss; + ss << INTEGERS[i]; + ASSERT_EQ(STRINGS[i], ss.str()); + } +} + +TEST(TestInt128, TestOstreamUnsigned) { + uint128_t INTEGERS[] = {0, 1, 1234567890, + UINT128_MIN, UINT128_MAX}; + string STRINGS[] = {"0", "1", "1234567890", + "0", "340282366920938463463374607431768211455"}; + for (size_t i = 0; i < arraysize(INTEGERS); i++) { + std::ostringstream ss; + ss << INTEGERS[i]; + ASSERT_EQ(STRINGS[i], ss.str()); + } +} + +TEST(TestInt128, TestCasting) { + uint128_t mathToMax = (static_cast<uint128_t>(INT128_MAX) * 2) + 1; + ASSERT_EQ(UINT128_MAX, mathToMax); + + uint128_t castToMax = static_cast<uint128_t>(-1); + ASSERT_EQ(UINT128_MAX, castToMax); +} + +} // namespace kudu http://git-wip-us.apache.org/repos/asf/impala/blob/fcf190c4/be/src/kudu/util/int128.h ---------------------------------------------------------------------- diff --git a/be/src/kudu/util/int128.h b/be/src/kudu/util/int128.h new file mode 100644 index 0000000..ac35d08 --- /dev/null +++ b/be/src/kudu/util/int128.h @@ -0,0 +1,46 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// This file is the central location for defining the int128 type +// used by Kudu. Though this file is small it ensures flexibility +// as choices and standards around int128 change. +#pragma once + +// __int128 is not supported before gcc 4.6 +#if defined(__clang__) || \ + (defined(__GNUC__) && \ + (__GNUC__ * 10000 + __GNUC_MINOR__ * 100) >= 40600) +#define KUDU_INT128_SUPPORTED 1 +#else +#define KUDU_INT128_SUPPORTED 0 +#endif + +#if KUDU_INT128_SUPPORTED +namespace kudu { + +typedef unsigned __int128 uint128_t; +typedef signed __int128 int128_t; + +// Note: We don't use numeric_limits because it can give incorrect +// values for __int128 and unsigned __int128. +static const uint128_t UINT128_MIN = (uint128_t) 0; +static const uint128_t UINT128_MAX = ((uint128_t) -1); +static const int128_t INT128_MAX = ((int128_t)(UINT128_MAX >> 1)); +static const int128_t INT128_MIN = (-INT128_MAX - 1); + +} // namespace kudu +#endif http://git-wip-us.apache.org/repos/asf/impala/blob/fcf190c4/be/src/kudu/util/int128_util.h ---------------------------------------------------------------------- diff --git a/be/src/kudu/util/int128_util.h b/be/src/kudu/util/int128_util.h new file mode 100644 index 0000000..2d01de7 --- /dev/null +++ b/be/src/kudu/util/int128_util.h @@ -0,0 +1,39 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "kudu/util/int128.h" + +#include <iostream> +#include <string> + +#include "kudu/gutil/strings/numbers.h" + +namespace std { + +// Support the << operator on int128_t and uint128_t types. +// +inline std::ostream& operator<<(std::ostream& os, const __int128& val) { + os << SimpleItoa(val); + return os; +} +inline std::ostream& operator<<(std::ostream& os, const unsigned __int128& val) { + os << SimpleItoa(val); + return os; +} + +} // namespace std + http://git-wip-us.apache.org/repos/asf/impala/blob/fcf190c4/be/src/kudu/util/interval_tree-inl.h ---------------------------------------------------------------------- diff --git a/be/src/kudu/util/interval_tree-inl.h b/be/src/kudu/util/interval_tree-inl.h new file mode 100644 index 0000000..7637317 --- /dev/null +++ b/be/src/kudu/util/interval_tree-inl.h @@ -0,0 +1,444 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#ifndef KUDU_UTIL_INTERVAL_TREE_INL_H +#define KUDU_UTIL_INTERVAL_TREE_INL_H + +#include <algorithm> +#include <vector> + +#include "kudu/util/interval_tree.h" + +namespace kudu { + +template<class Traits> +IntervalTree<Traits>::IntervalTree(const IntervalVector &intervals) + : root_(NULL) { + if (!intervals.empty()) { + root_ = CreateNode(intervals); + } +} + +template<class Traits> +IntervalTree<Traits>::~IntervalTree() { + delete root_; +} + +template<class Traits> +template<class QueryPointType> +void IntervalTree<Traits>::FindContainingPoint(const QueryPointType &query, + IntervalVector *results) const { + if (root_) { + root_->FindContainingPoint(query, results); + } +} + +template<class Traits> +template<class Callback, class QueryContainer> +void IntervalTree<Traits>::ForEachIntervalContainingPoints( + const QueryContainer& queries, + const Callback& cb) const { + if (root_) { + root_->ForEachIntervalContainingPoints(queries.begin(), queries.end(), cb); + } +} + + +template<class Traits> +void IntervalTree<Traits>::FindIntersectingInterval(const interval_type &query, + IntervalVector *results) const { + if (root_) { + root_->FindIntersectingInterval(query, results); + } +} + +template<class Traits> +static bool LessThan(const typename Traits::point_type &a, + const typename Traits::point_type &b) { + return Traits::compare(a, b) < 0; +} + +// Select a split point which attempts to evenly divide 'in' into three groups: +// (a) those that are fully left of the split point +// (b) those that overlap the split point. +// (c) those that are fully right of the split point +// These three groups are stored in the output parameters '*left', '*overlapping', +// and '*right', respectively. The selected split point is stored in *split_point. +// +// For example, the input interval set: +// +// |------1-------| |-----2-----| +// |--3--| |---4--| |----5----| +// | +// Resulting split: | Partition point +// | +// +// *left: intervals 1 and 3 +// *overlapping: interval 4 +// *right: intervals 2 and 5 +template<class Traits> +void IntervalTree<Traits>::Partition(const IntervalVector &in, + point_type *split_point, + IntervalVector *left, + IntervalVector *overlapping, + IntervalVector *right) { + CHECK(!in.empty()); + + // Pick a split point which is the median of all of the interval boundaries. + std::vector<point_type> endpoints; + endpoints.reserve(in.size() * 2); + for (const interval_type &interval : in) { + endpoints.push_back(Traits::get_left(interval)); + endpoints.push_back(Traits::get_right(interval)); + } + std::sort(endpoints.begin(), endpoints.end(), LessThan<Traits>); + *split_point = endpoints[endpoints.size() / 2]; + + // Partition into the groups based on the determined split point. + for (const interval_type &interval : in) { + if (Traits::compare(Traits::get_right(interval), *split_point) < 0) { + // | split point + // |------------| | + // interval + left->push_back(interval); + } else if (Traits::compare(Traits::get_left(interval), *split_point) > 0) { + // | split point + // | |------------| + // interval + right->push_back(interval); + } else { + // | split point + // | + // |------------| + // interval + overlapping->push_back(interval); + } + } +} + +template<class Traits> +typename IntervalTree<Traits>::node_type *IntervalTree<Traits>::CreateNode( + const IntervalVector &intervals) { + IntervalVector left, right, overlap; + point_type split_point; + + // First partition the input intervals and select a split point + Partition(intervals, &split_point, &left, &overlap, &right); + + // Recursively subdivide the intervals which are fully left or fully + // right of the split point into subtree nodes. + node_type *left_node = !left.empty() ? CreateNode(left) : NULL; + node_type *right_node = !right.empty() ? CreateNode(right) : NULL; + + return new node_type(split_point, left_node, overlap, right_node); +} + +namespace interval_tree_internal { + +// Node in the interval tree. +template<typename Traits> +class ITNode { + private: + // Import types. + typedef std::vector<typename Traits::interval_type> IntervalVector; + typedef typename Traits::interval_type interval_type; + typedef typename Traits::point_type point_type; + + public: + ITNode(point_type split_point, + ITNode<Traits> *left, + const IntervalVector &overlap, + ITNode<Traits> *right); + ~ITNode(); + + // See IntervalTree::FindContainingPoint(...) + template<class QueryPointType> + void FindContainingPoint(const QueryPointType &query, + IntervalVector *results) const; + + // See IntervalTree::ForEachIntervalContainingPoints(). + // We use iterators here since as recursion progresses down the tree, we + // process sub-sequences of the original set of query points. + template<class Callback, class ItType> + void ForEachIntervalContainingPoints(ItType begin_queries, + ItType end_queries, + const Callback& cb) const; + + // See IntervalTree::FindIntersectingInterval(...) + void FindIntersectingInterval(const interval_type &query, + IntervalVector *results) const; + + private: + // Comparators for sorting lists of intervals. + static bool SortByAscLeft(const interval_type &a, const interval_type &b); + static bool SortByDescRight(const interval_type &a, const interval_type &b); + + // Partition point of this node. + point_type split_point_; + + // Those nodes that overlap with split_point_, in ascending order by their left side. + IntervalVector overlapping_by_asc_left_; + + // Those nodes that overlap with split_point_, in descending order by their right side. + IntervalVector overlapping_by_desc_right_; + + // Tree node for intervals fully left of split_point_, or NULL. + ITNode *left_; + + // Tree node for intervals fully right of split_point_, or NULL. + ITNode *right_; + + DISALLOW_COPY_AND_ASSIGN(ITNode); +}; + +template<class Traits> +bool ITNode<Traits>::SortByAscLeft(const interval_type &a, const interval_type &b) { + return Traits::compare(Traits::get_left(a), Traits::get_left(b)) < 0; +} + +template<class Traits> +bool ITNode<Traits>::SortByDescRight(const interval_type &a, const interval_type &b) { + return Traits::compare(Traits::get_right(a), Traits::get_right(b)) > 0; +} + +template <class Traits> +ITNode<Traits>::ITNode(typename Traits::point_type split_point, + ITNode<Traits> *left, const IntervalVector &overlap, + ITNode<Traits> *right) + : split_point_(std::move(split_point)), left_(left), right_(right) { + // Store two copies of the set of intervals which overlap the split point: + // 1) Sorted by ascending left boundary + overlapping_by_asc_left_.assign(overlap.begin(), overlap.end()); + std::sort(overlapping_by_asc_left_.begin(), overlapping_by_asc_left_.end(), SortByAscLeft); + // 2) Sorted by descending right boundary + overlapping_by_desc_right_.assign(overlap.begin(), overlap.end()); + std::sort(overlapping_by_desc_right_.begin(), overlapping_by_desc_right_.end(), SortByDescRight); +} + +template<class Traits> +ITNode<Traits>::~ITNode() { + if (left_) delete left_; + if (right_) delete right_; +} + +template<class Traits> +template<class Callback, class ItType> +void ITNode<Traits>::ForEachIntervalContainingPoints(ItType begin_queries, + ItType end_queries, + const Callback& cb) const { + if (begin_queries == end_queries) return; + + typedef decltype(*begin_queries) QueryPointType; + const auto& partitioner = [&](const QueryPointType& query_point) { + return Traits::compare(query_point, split_point_) < 0; + }; + + // Partition the query points into those less than the split_point_ and those greater + // than or equal to the split_point_. Because the input queries are already sorted, we + // can use 'std::partition_point' instead of 'std::partition'. + // + // The resulting 'partition_point' is the first query point in the second group. + // + // Complexity: O(log(number of query points)) + DCHECK(std::is_partitioned(begin_queries, end_queries, partitioner)); + auto partition_point = std::partition_point(begin_queries, end_queries, partitioner); + + // Recurse left: any query points left of the split point may intersect + // with non-overlapping intervals fully-left of our split point. + if (left_ != NULL) { + left_->ForEachIntervalContainingPoints(begin_queries, partition_point, cb); + } + + // Handle the query points < split_point + // + // split_point_ + // | + // [------] \ + // [-------] | overlapping_by_asc_left_ + // [--------] / + // Q Q Q + // ^ ^ \___ not handled (right of split_point_) + // | | + // \___\___ these points will be handled here + // + + // Lower bound of query points still relevant. + auto rem_queries = begin_queries; + for (const interval_type &interval : overlapping_by_asc_left_) { + const auto& interval_left = Traits::get_left(interval); + // Find those query points which are right of the left side of the interval. + // 'first_match' here is the first query point >= interval_left. + // Complexity: O(log(num_queries)) + // + // TODO(todd): The non-batched implementation is O(log(num_intervals) * num_queries) + // whereas this loop ends up O(num_intervals * log(num_queries)). So, for + // small numbers of queries this is not the fastest way to structure these loops. + auto first_match = std::partition_point( + rem_queries, partition_point, + [&](const QueryPointType& query_point) { + return Traits::compare(query_point, interval_left) < 0; + }); + for (auto it = first_match; it != partition_point; ++it) { + cb(*it, interval); + } + // Since the intervals are sorted in ascending-left order, we can start + // the search for the next interval at the first match in this interval. + // (any query point which was left of the current interval will also be left + // of all future intervals). + rem_queries = std::move(first_match); + } + + // Handle the query points >= split_point + // + // split_point_ + // | + // [--------] \ + // [-------] | overlapping_by_desc_right_ + // [------] / + // Q Q Q + // | \______\___ these points will be handled here + // | + // \___ not handled (left of split_point_) + + // Upper bound of query points still relevant. + rem_queries = end_queries; + for (const interval_type &interval : overlapping_by_desc_right_) { + const auto& interval_right = Traits::get_right(interval); + // Find the first query point which is > the right side of the interval. + auto first_non_match = std::partition_point( + partition_point, rem_queries, + [&](const QueryPointType& query_point) { + return Traits::compare(query_point, interval_right) <= 0; + }); + for (auto it = partition_point; it != first_non_match; ++it) { + cb(*it, interval); + } + // Same logic as above: if a query point was fully right of 'interval', + // then it will be fully right of all following intervals because they are + // sorted by descending-right. + rem_queries = std::move(first_non_match); + } + + if (right_ != NULL) { + while (partition_point != end_queries && + Traits::compare(*partition_point, split_point_) == 0) { + ++partition_point; + } + right_->ForEachIntervalContainingPoints(partition_point, end_queries, cb); + } +} + +template<class Traits> +template<class QueryPointType> +void ITNode<Traits>::FindContainingPoint(const QueryPointType &query, + IntervalVector *results) const { + int cmp = Traits::compare(query, split_point_); + if (cmp < 0) { + // None of the intervals in right_ may intersect this. + if (left_ != NULL) { + left_->FindContainingPoint(query, results); + } + + // Any intervals which start before the query point and overlap the split point + // must therefore contain the query point. + auto p = std::partition_point( + overlapping_by_asc_left_.cbegin(), overlapping_by_asc_left_.cend(), + [&](const interval_type& interval) { + return Traits::compare(Traits::get_left(interval), query) <= 0; + }); + results->insert(results->end(), overlapping_by_asc_left_.cbegin(), p); + } else if (cmp > 0) { + // None of the intervals in left_ may intersect this. + if (right_ != NULL) { + right_->FindContainingPoint(query, results); + } + + // Any intervals which end after the query point and overlap the split point + // must therefore contain the query point. + auto p = std::partition_point( + overlapping_by_desc_right_.cbegin(), overlapping_by_desc_right_.cend(), + [&](const interval_type& interval) { + return Traits::compare(Traits::get_right(interval), query) >= 0; + }); + results->insert(results->end(), overlapping_by_desc_right_.cbegin(), p); + } else { + DCHECK_EQ(cmp, 0); + // The query is exactly our split point -- in this case we've already got + // the computed list of overlapping intervals. + results->insert(results->end(), overlapping_by_asc_left_.begin(), + overlapping_by_asc_left_.end()); + } +} + +template<class Traits> +void ITNode<Traits>::FindIntersectingInterval(const interval_type &query, + IntervalVector *results) const { + if (Traits::compare(Traits::get_right(query), split_point_) < 0) { + // The interval is fully left of the split point. So, it may not overlap + // with any in 'right_' + if (left_ != NULL) { + left_->FindIntersectingInterval(query, results); + } + + // Any intervals whose left edge is <= the query interval's right edge + // intersect the query interval. 'std::partition_point' returns the first + // such interval which does not meet that criterion, so we insert all + // up to that point. + auto first_greater = std::partition_point( + overlapping_by_asc_left_.cbegin(), overlapping_by_asc_left_.cend(), + [&](const interval_type& interval) { + return Traits::compare(Traits::get_left(interval), Traits::get_right(query)) <= 0; + }); + results->insert(results->end(), overlapping_by_asc_left_.cbegin(), first_greater); + } else if (Traits::compare(Traits::get_left(query), split_point_) > 0) { + // The interval is fully right of the split point. So, it may not overlap + // with any in 'left_'. + if (right_ != NULL) { + right_->FindIntersectingInterval(query, results); + } + + // Any intervals whose right edge is >= the query interval's left edge + // intersect the query interval. 'std::partition_point' returns the first + // such interval which does not meet that criterion, so we insert all + // up to that point. + auto first_lesser = std::partition_point( + overlapping_by_desc_right_.cbegin(), overlapping_by_desc_right_.cend(), + [&](const interval_type& interval) { + return Traits::compare(Traits::get_right(interval), Traits::get_left(query)) >= 0; + }); + results->insert(results->end(), overlapping_by_desc_right_.cbegin(), first_lesser); + } else { + // The query interval contains the split point. Therefore all other intervals + // which also contain the split point are intersecting. + results->insert(results->end(), overlapping_by_asc_left_.begin(), + overlapping_by_asc_left_.end()); + + // The query interval may _also_ intersect some in either child. + if (left_ != NULL) { + left_->FindIntersectingInterval(query, results); + } + if (right_ != NULL) { + right_->FindIntersectingInterval(query, results); + } + } +} + + +} // namespace interval_tree_internal + +} // namespace kudu + +#endif