Remove key-normalizer*.h which appear to be dead code at this point. Testing done: compiled Impala.
Change-Id: If890c2a2589148db38ea333a518dc1368dcd5459 Reviewed-on: http://gerrit.cloudera.org:8080/6377 Tested-by: Impala Public Jenkins Reviewed-by: Michael Ho <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/87e95f80 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/87e95f80 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/87e95f80 Branch: refs/heads/master Commit: 87e95f804e2760fea0407e9fc75f3966a6e51ac9 Parents: 5a333c4 Author: Michael Ho <[email protected]> Authored: Mon Mar 13 19:20:27 2017 -0700 Committer: Michael Ho <[email protected]> Committed: Wed Mar 15 05:11:15 2017 +0000 ---------------------------------------------------------------------- be/src/util/key-normalizer.h | 111 ------------------ be/src/util/key-normalizer.inline.h | 188 ------------------------------- 2 files changed, 299 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/87e95f80/be/src/util/key-normalizer.h ---------------------------------------------------------------------- diff --git a/be/src/util/key-normalizer.h b/be/src/util/key-normalizer.h deleted file mode 100644 index 3c9c30f..0000000 --- a/be/src/util/key-normalizer.h +++ /dev/null @@ -1,111 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#ifndef IMPALA_UTIL_KEY_NORMALIZER_H_ -#define IMPALA_UTIL_KEY_NORMALIZER_H_ - -#include "exprs/expr.h" - -namespace impala { - -/// Provides support for normalizing Impala expr values into a memcmp-able, -/// fixed-length format. -// -/// To normalize a key, we first write a null byte (0 if nulls_first, 1 otw), -/// followed by the normalized form of the key. We invert the bytes of the key (excluding -/// the null byte) if the key should be sorted in descending order. Further, for any -/// multi-byte data types, we ensure that the most significant byte is first by -/// converting to big endian. -// -/// In addition to inverting descending keys and converting to big endian, here is how -/// we normalize specific types: -/// Integers: -/// Invert the sign bit. -/// Floats: -/// Write out the inverted sign bit, followed by the exponent, followed by -/// the fraction. If the float is negative, though, we need to invert both the exponent -/// and fraction (since smaller number means greater actual value when negative). -/// Conveniently, IEEE floating point numbers are already in the correct order. -/// Timestamps: -/// 32 bits for date: 23 bits for year, 4 bits for month, and 5 bits for day. -/// 64 bits for time of day in nanoseconds. -/// All numbers assumed unsigned. -/// Strings: -/// Write one character at a time with a null byte at the end (inverted if -/// sort descending). Unlike other data types, we may write partial strings. -/// NOTE: This assumes strings do not contain null characters. -/// Booleans/Nulls: -/// Left as-is. -// -/// Finally, we pad any remaining bytes of the key with zeroes. -class KeyNormalizer { - public: - /// Initializes the normalizer with the key exprs and length alloted to each normalized - /// key. - KeyNormalizer(const std::vector<ExprContext*>& key_exprs_ctxs, int key_len, - const std::vector<bool>& is_asc, const std::vector<bool>& nulls_first) - : key_expr_ctxs_(key_expr_ctxs), key_len_(key_len), is_asc_(is_asc), - nulls_first_(nulls_first) { - } - - /// Normalizes all keys and writes the value into dst. - /// Returns true if we went over the max key size while writing the key. - /// If the return value is true, then key_idx_over_budget will be set to - /// the index of the key expr which went over. - /// TODO: Handle non-nullable columns - bool NormalizeKey(TupleRow* tuple_row, uint8_t* dst, int* key_idx_over_budget = NULL); - - private: - /// Returns true if we went over the max key size while writing the null bit. - static bool WriteNullBit(uint8_t null_bit, uint8_t* value, uint8_t* dst, - int* bytes_left); - - /// Stores the given value in the memory address given by dst, after - /// converting to big endian and inverting the value if the sort is descending. - /// Copy of 'value' intentional, we don't want to modify original. - template <typename ValueType> - static void StoreFinalValue(ValueType value, void* dst, bool is_asc); - - template <typename IntType> - static void NormalizeInt(void* src, void* dst, bool is_asc); - - /// ResultType should be an integer type of the same size as FloatType, used - /// to examine the bytes of the float. - template <typename FloatType, typename ResultType> - static void NormalizeFloat(void* src, void* dst, bool is_asc); - - static void NormalizeTimestamp(uint8_t* src, uint8_t* dst, bool is_asc); - - /// Normalizes a sort key value and writes it to dst. - /// Updates bytes_left and returns true if we went over the max key size. - static bool WriteNormalizedKey(const ColumnType& type, bool is_asc, - uint8_t* value, uint8_t* dst, int* bytes_left); - - /// Normalizes a column by writing a NULL byte and then the normalized value. - /// Updates bytes_left and returns true if we went over the max key size. - static bool NormalizeKeyColumn(const ColumnType& type, uint8_t null_bit, bool is_asc, - uint8_t* value, uint8_t* dst, int* bytes_left); - - std::vector<ExprContext*> key_expr_ctxs_; - int key_len_; - std::vector<bool> is_asc_; - std::vector<bool> nulls_first_; -}; - -} - -#endif http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/87e95f80/be/src/util/key-normalizer.inline.h ---------------------------------------------------------------------- diff --git a/be/src/util/key-normalizer.inline.h b/be/src/util/key-normalizer.inline.h deleted file mode 100644 index 761dd5c..0000000 --- a/be/src/util/key-normalizer.inline.h +++ /dev/null @@ -1,188 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#ifndef IMPALA_UTIL_KEY_NORMALIZER_INLINE_H_ -#define IMPALA_UTIL_KEY_NORMALIZER_INLINE_H_ - -#include "util/key-normalizer.h" - -#include <boost/date_time/gregorian/gregorian_types.hpp> - -#include "runtime/descriptors.h" -#include "runtime/string-value.h" -#include "runtime/timestamp-value.h" -#include "util/bit-util.h" - -namespace impala { - -inline bool KeyNormalizer::WriteNullBit(uint8_t null_bit, uint8_t* value, uint8_t* dst, - int* bytes_left) { - // If there's not enough space for the null byte, return. - if (*bytes_left < 1) return true; - *dst = (value == NULL ? null_bit : !null_bit); - --*bytes_left; - return false; -} - -template <typename ValueType> -inline void KeyNormalizer::StoreFinalValue(ValueType value, void* dst, bool is_asc) { - if (sizeof(ValueType) > 1) value = BitUtil::ToBigEndian(value); - if (!is_asc) value = ~value; - memcpy(dst, &value, sizeof(ValueType)); -} - -template <typename IntType> -inline void KeyNormalizer::NormalizeInt(void* src, void* dst, bool is_asc) { - const int num_bits = 8 * sizeof(IntType); - IntType sign_bit = (1LL << (num_bits - 1)); - - IntType value = *(reinterpret_cast<IntType*>(src)); - value = (sign_bit ^ value); - StoreFinalValue<IntType>(value, dst, is_asc); -} - -template <typename FloatType, typename ResultType> -inline void KeyNormalizer::NormalizeFloat(void* src, void* dst, bool is_asc) { - DCHECK_EQ(sizeof(FloatType), sizeof(ResultType)); - - const int num_bits = 8 * sizeof(FloatType); - const ResultType sign_bit = (1LL << (num_bits - 1)); - - ResultType value = *(reinterpret_cast<ResultType*>(src)); - if (value & sign_bit) { - // If the sign is negative, we'll end up inverting the whole thing. - value = ~value; - } else { - // Otherwise, just invert the sign bit. - value = (sign_bit ^ value); - } - StoreFinalValue<ResultType>(value, dst, is_asc); -} - -inline void KeyNormalizer::NormalizeTimestamp(uint8_t* src, uint8_t* dst, bool is_asc) { - TimestampValue timestamp = *(reinterpret_cast<TimestampValue*>(src)); - - // Need 5 bits for day and 4 bits for month. Rest given to year. - boost::gregorian::date::ymd_type ymd = timestamp.date().year_month_day(); - uint32_t date = ymd.day | (ymd.month << 5) | (ymd.year << 9); - StoreFinalValue<uint32_t>(date, dst, is_asc); - - // Write time of day in nanoseconds in the next slot. - uint64_t time_ns = timestamp.time_of_day().total_nanoseconds(); - StoreFinalValue<uint64_t>(time_ns, dst + sizeof(date), is_asc); -} - -inline bool KeyNormalizer::WriteNormalizedKey(const ColumnType& type, bool is_asc, - uint8_t* value, uint8_t* dst, int* bytes_left) { - // Expend bytes_left or fail if we don't have enough. - // Variable-length data types (i.e., strings) account for themselves. - int byte_size = type.GetByteSize(); - if (byte_size != 0) { - if (*bytes_left >= byte_size) { - *bytes_left -= byte_size; - } else { - return true; - } - } - - switch(type.type) { - case TYPE_BIGINT: - NormalizeInt<int64_t>(value, dst, is_asc); - break; - case TYPE_INT: - NormalizeInt<int32_t>(value, dst, is_asc); - break; - case TYPE_SMALLINT: - NormalizeInt<int16_t>(value, dst, is_asc); - break; - case TYPE_TINYINT: - NormalizeInt<int8_t>(value, dst, is_asc); - break; - - case TYPE_DOUBLE: - NormalizeFloat<double, uint64_t>(value, dst, is_asc); - break; - case TYPE_FLOAT: - NormalizeFloat<float, uint32_t>(value, dst, is_asc); - break; - - case TYPE_TIMESTAMP: - NormalizeTimestamp(value, dst, is_asc); - break; - - case TYPE_STRING: - case TYPE_VARCHAR: { - StringValue* string_val = reinterpret_cast<StringValue*>(value); - - // Copy the string over, with an additional NULL at the end. - int size = std::min(string_val->len, *bytes_left); - for (int i = 0; i < size; ++i) { - StoreFinalValue<uint8_t>(string_val->ptr[i], dst + i, is_asc); - } - *bytes_left -= size; - - if (*bytes_left == 0) return true; - - StoreFinalValue<uint8_t>(0, dst + size, is_asc); - --*bytes_left; - return false; - } - - case TYPE_BOOLEAN: - StoreFinalValue<uint8_t>(*reinterpret_cast<uint8_t*>(value), dst, is_asc); - break; - case TYPE_NULL: - StoreFinalValue<uint8_t>(0, dst, is_asc); - break; - default: - DCHECK(false) << "Value type not supported for normalization"; - } - - return false; -} - -inline bool KeyNormalizer::NormalizeKeyColumn(const ColumnType& type, uint8_t null_bit, - bool is_asc, uint8_t* value, uint8_t* dst, int* bytes_left) { - bool went_over = WriteNullBit(null_bit, value, dst, bytes_left); - if (went_over || value == NULL) return went_over; - return WriteNormalizedKey(type, is_asc, value, dst + 1, bytes_left); -} - -inline bool KeyNormalizer::NormalizeKey(TupleRow* row, uint8_t* dst, - int* key_idx_over_budget) { - int bytes_left = key_len_; - for (int i = 0; i < key_expr_ctxs_.size(); ++i) { - uint8_t* key = reinterpret_cast<uint8_t*>(key_expr_ctxs_[i]->GetValue(row)); - int offset = key_len_ - bytes_left; - bool went_over = NormalizeKeyColumn(key_expr_ctxs_[i]->root()->type(), - !nulls_first_[i], is_asc_[i], key, dst + offset, &bytes_left); - if (went_over) { - if (key_idx_over_budget != NULL) *key_idx_over_budget = i; - return true; - } - } - - // Zero out any unused bytes of the sort key. - int offset = key_len_ - bytes_left; - bzero(dst + offset, bytes_left); - - return false; -} - -} - -#endif
