Repository: incubator-impala Updated Branches: refs/heads/master fb8ea6a9c -> 7843b472f
IMPALA-5560: always store CHAR(N) inline in tuple This is done to simplify the CHAR(N) logic. I believe this is overall an improvement - any benefits of the out-of-line storage that motivated this optimisation originally were outweighed by the added complexity. This also avoids IMPALA-5559 (fe/be have different notions of var-len), which will unblock IMPALA-3200. Pros: * Reduce the number of code paths and improve test coverage. (e.g. avoids IMPALA-5559: fe/be have different notions of var-len) * Reduced memory to store non-NULL data (saves 12-byte StringValue) * Fewer branches in code -> save CPU cycles. * If CHAR(N) performance is important, reduced complexity makes it easier to implement codegen. Cons: * Requires N bytes to store a NULL value. * May hurt cache locality (although this is speculative in my mind). The change is mostly mechanical - I removed MAX_CHAR_INLINE_LENGTH and then removed branches that depended on that. Testing: Ran exhaustive build. Change-Id: I9c0b823ccff6b0c37f5267c548d096c29b8caac3 Reviewed-on: http://gerrit.cloudera.org:8080/7303 Reviewed-by: Tim Armstrong <[email protected]> Tested-by: Impala Public Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/7843b472 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/7843b472 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/7843b472 Branch: refs/heads/master Commit: 7843b472f2fd04cd63dd5758aaf0cc082a7da025 Parents: fb8ea6a Author: Tim Armstrong <[email protected]> Authored: Thu Jun 22 10:45:14 2017 -0700 Committer: Impala Public Jenkins <[email protected]> Committed: Fri Jun 30 22:49:40 2017 +0000 ---------------------------------------------------------------------- be/src/exec/hash-table.cc | 24 +++++++----- be/src/exec/hdfs-avro-scanner-test.cc | 10 +---- be/src/exec/hdfs-parquet-table-writer.cc | 2 +- be/src/exec/hdfs-text-table-writer.cc | 2 +- be/src/exec/parquet-column-readers.cc | 40 +++++++------------- be/src/exprs/agg-fn-evaluator.cc | 2 +- be/src/exprs/anyval-util.h | 23 +++++------ be/src/exprs/scalar-expr-evaluator.cc | 6 +-- be/src/exprs/slot-ref.cc | 3 +- be/src/runtime/raw-value-ir.cc | 4 +- be/src/runtime/raw-value.cc | 20 +++++----- be/src/runtime/raw-value.inline.h | 11 +++--- be/src/runtime/string-value.h | 6 --- be/src/runtime/string-value.inline.h | 23 ----------- be/src/runtime/types.h | 6 +-- be/src/service/fe-support.cc | 2 +- be/src/service/hs2-util.cc | 4 +- .../org/apache/impala/catalog/ScalarType.java | 5 --- .../queries/QueryTest/spilling.test | 6 +-- 19 files changed, 69 insertions(+), 130 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/7843b472/be/src/exec/hash-table.cc ---------------------------------------------------------------------- diff --git a/be/src/exec/hash-table.cc b/be/src/exec/hash-table.cc index 0d456f6..a4856e9 100644 --- a/be/src/exec/hash-table.cc +++ b/be/src/exec/hash-table.cc @@ -69,15 +69,21 @@ static uint32_t SEED_PRIMES[] = { // We don't want(NULL, 1) to hash to the same as (0, 1). // This needs to be as big as the biggest primitive type since the bytes // get copied directly. -// TODO find a better approach, since primitives like CHAR(N) can be up to 128 bytes -static int64_t NULL_VALUE[] = { HashUtil::FNV_SEED, HashUtil::FNV_SEED, - HashUtil::FNV_SEED, HashUtil::FNV_SEED, - HashUtil::FNV_SEED, HashUtil::FNV_SEED, - HashUtil::FNV_SEED, HashUtil::FNV_SEED, - HashUtil::FNV_SEED, HashUtil::FNV_SEED, - HashUtil::FNV_SEED, HashUtil::FNV_SEED, - HashUtil::FNV_SEED, HashUtil::FNV_SEED, - HashUtil::FNV_SEED, HashUtil::FNV_SEED }; +// TODO find a better approach, since primitives like CHAR(N) can be up +// to 255 bytes +static int64_t NULL_VALUE[] = { + HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, + HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, + HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, + HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, + HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, + HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, + HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, + HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED +}; + +static_assert(sizeof(NULL_VALUE) >= ColumnType::MAX_CHAR_LENGTH, + "NULL_VALUE must be at least as large as the largest possible slot"); // The first NUM_SMALL_BLOCKS of nodes_ are made of blocks less than the IO size (of 8MB) // to reduce the memory footprint of small queries. In particular, we always first use a http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/7843b472/be/src/exec/hdfs-avro-scanner-test.cc ---------------------------------------------------------------------- diff --git a/be/src/exec/hdfs-avro-scanner-test.cc b/be/src/exec/hdfs-avro-scanner-test.cc index 3567d5c..9358de4 100644 --- a/be/src/exec/hdfs-avro-scanner-test.cc +++ b/be/src/exec/hdfs-avro-scanner-test.cc @@ -151,14 +151,8 @@ class HdfsAvroScannerTest : public testing::Test { char slot[max<int>(sizeof(StringValue), max_len)]; bool success = scanner_.ReadAvroChar(TYPE_CHAR, max_len, &new_data, data + data_len, true, slot, NULL); - StringValue value; - if (max_len <= ColumnType::MAX_CHAR_INLINE_LENGTH) { - // Convert to non-inlined string value for comparison. - value.len = max_len; - value.ptr = slot; - } else { - value = *reinterpret_cast<StringValue*>(slot); - } + // Convert to non-inlined string value for comparison. + StringValue value(slot, max_len); CheckReadResult(expected_val, expected_encoded_len, expected_error, value, success, new_data - data); } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/7843b472/be/src/exec/hdfs-parquet-table-writer.cc ---------------------------------------------------------------------- diff --git a/be/src/exec/hdfs-parquet-table-writer.cc b/be/src/exec/hdfs-parquet-table-writer.cc index cc62ef8..5a2d810 100644 --- a/be/src/exec/hdfs-parquet-table-writer.cc +++ b/be/src/exec/hdfs-parquet-table-writer.cc @@ -387,7 +387,7 @@ template<> inline StringValue* HdfsParquetTableWriter::ColumnWriter<StringValue>::CastValue( void* value) { if (type().type == TYPE_CHAR) { - temp_.ptr = StringValue::CharSlotToPtr(value, type()); + temp_.ptr = reinterpret_cast<char*>(value); temp_.len = StringValue::UnpaddedCharLength(temp_.ptr, type().len); return &temp_; } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/7843b472/be/src/exec/hdfs-text-table-writer.cc ---------------------------------------------------------------------- diff --git a/be/src/exec/hdfs-text-table-writer.cc b/be/src/exec/hdfs-text-table-writer.cc index 82db972..aaee773 100644 --- a/be/src/exec/hdfs-text-table-writer.cc +++ b/be/src/exec/hdfs-text-table-writer.cc @@ -128,7 +128,7 @@ Status HdfsTextTableWriter::AppendRows( if (value != NULL) { const ColumnType& type = output_expr_evals_[j]->root().type(); if (type.type == TYPE_CHAR) { - char* val_ptr = StringValue::CharSlotToPtr(value, type); + char* val_ptr = reinterpret_cast<char*>(value); StringValue sv(val_ptr, StringValue::UnpaddedCharLength(val_ptr, type.len)); PrintEscaped(&sv); } else if (type.IsVarLenStringType()) { http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/7843b472/be/src/exec/parquet-column-readers.cc ---------------------------------------------------------------------- diff --git a/be/src/exec/parquet-column-readers.cc b/be/src/exec/parquet-column-readers.cc index 9d4a329..9f00762 100644 --- a/be/src/exec/parquet-column-readers.cc +++ b/be/src/exec/parquet-column-readers.cc @@ -498,7 +498,7 @@ class ScalarColumnReader : public BaseScalarColumnReader { return false; } if (UNLIKELY(NeedsConversionInline() && !tuple->IsNull(null_indicator_offset_) - && !ConvertSlot(val_ptr, reinterpret_cast<T*>(slot), pool))) { + && !ConvertSlot(val_ptr, slot, pool))) { return false; } return true; @@ -520,8 +520,8 @@ class ScalarColumnReader : public BaseScalarColumnReader { return false; } - /// Converts and writes src into dst based on desc_->type() - bool ConvertSlot(const T* src, T* dst, MemPool* pool) { + /// Converts and writes 'src' into 'slot' based on desc_->type() + bool ConvertSlot(const T* src, void* slot, MemPool* pool) { DCHECK(false); return false; } @@ -564,29 +564,14 @@ inline bool ScalarColumnReader<StringValue, true>::NeedsConversionInline() const template<> bool ScalarColumnReader<StringValue, true>::ConvertSlot( - const StringValue* src, StringValue* dst, MemPool* pool) { + const StringValue* src, void* slot, MemPool* pool) { DCHECK(slot_desc() != NULL); DCHECK(slot_desc()->type().type == TYPE_CHAR); - int len = slot_desc()->type().len; - StringValue sv; - sv.len = len; - if (slot_desc()->type().IsVarLenStringType()) { - sv.ptr = reinterpret_cast<char*>(pool->TryAllocate(len)); - if (UNLIKELY(sv.ptr == NULL)) { - string details = Substitute(PARQUET_COL_MEM_LIMIT_EXCEEDED, "ConvertSlot", - len, "StringValue"); - parent_->parse_status_ = - pool->mem_tracker()->MemLimitExceeded(parent_->state_, details, len); - return false; - } - } else { - sv.ptr = reinterpret_cast<char*>(dst); - } - int unpadded_len = min(len, src->len); - memcpy(sv.ptr, src->ptr, unpadded_len); - StringValue::PadWithSpaces(sv.ptr, len, unpadded_len); - - if (slot_desc()->type().IsVarLenStringType()) *dst = sv; + int char_len = slot_desc()->type().len; + int unpadded_len = min(char_len, src->len); + char* dst_char = reinterpret_cast<char*>(slot); + memcpy(dst_char, src->ptr, unpadded_len); + StringValue::PadWithSpaces(dst_char, char_len, unpadded_len); return true; } @@ -597,11 +582,12 @@ inline bool ScalarColumnReader<TimestampValue, true>::NeedsConversionInline() co template<> bool ScalarColumnReader<TimestampValue, true>::ConvertSlot( - const TimestampValue* src, TimestampValue* dst, MemPool* pool) { + const TimestampValue* src, void* slot, MemPool* pool) { // Conversion should only happen when this flag is enabled. DCHECK(FLAGS_convert_legacy_hive_parquet_utc_timestamps); - *dst = *src; - if (dst->HasDateAndTime()) dst->UtcToLocal(); + TimestampValue* dst_ts = reinterpret_cast<TimestampValue*>(slot); + *dst_ts = *src; + if (dst_ts->HasDateAndTime()) dst_ts->UtcToLocal(); return true; } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/7843b472/be/src/exprs/agg-fn-evaluator.cc ---------------------------------------------------------------------- diff --git a/be/src/exprs/agg-fn-evaluator.cc b/be/src/exprs/agg-fn-evaluator.cc index 910eef1..860c78d 100644 --- a/be/src/exprs/agg-fn-evaluator.cc +++ b/be/src/exprs/agg-fn-evaluator.cc @@ -279,7 +279,7 @@ void AggFnEvaluator::Init(Tuple* dst) { void* slot = dst->GetSlot(slot_desc.tuple_offset()); StringVal* sv = reinterpret_cast<StringVal*>(staging_intermediate_val_); sv->is_null = dst->IsNull(slot_desc.null_indicator_offset()); - sv->ptr = reinterpret_cast<uint8_t*>(StringValue::CharSlotToPtr(slot, type)); + sv->ptr = reinterpret_cast<uint8_t*>(slot); sv->len = type.len; } reinterpret_cast<InitFn>(agg_fn_.init_fn_)( http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/7843b472/be/src/exprs/anyval-util.h ---------------------------------------------------------------------- diff --git a/be/src/exprs/anyval-util.h b/be/src/exprs/anyval-util.h index a18786e..c908ced 100644 --- a/be/src/exprs/anyval-util.h +++ b/be/src/exprs/anyval-util.h @@ -279,22 +279,19 @@ class AnyValUtil { return; case TYPE_STRING: case TYPE_VARCHAR: - case TYPE_CHAR: { - if (type.IsVarLenStringType()) { - reinterpret_cast<const StringValue*>(slot)->ToStringVal( - reinterpret_cast<StringVal*>(dst)); - if (type.type == TYPE_VARCHAR) { - StringVal* sv = reinterpret_cast<StringVal*>(dst); - DCHECK_GE(type.len, 0); - DCHECK_LE(sv->len, type.len); - } - } else { - DCHECK_EQ(type.type, TYPE_CHAR); + reinterpret_cast<const StringValue*>(slot)->ToStringVal( + reinterpret_cast<StringVal*>(dst)); + if (type.type == TYPE_VARCHAR) { StringVal* sv = reinterpret_cast<StringVal*>(dst); - sv->ptr = const_cast<uint8_t*>(reinterpret_cast<const uint8_t*>(slot)); - sv->len = type.len; + DCHECK_GE(type.len, 0); + DCHECK_LE(sv->len, type.len); } return; + case TYPE_CHAR: { + StringVal* sv = reinterpret_cast<StringVal*>(dst); + sv->ptr = const_cast<uint8_t*>(reinterpret_cast<const uint8_t*>(slot)); + sv->len = type.len; + return; } case TYPE_TIMESTAMP: reinterpret_cast<const TimestampValue*>(slot)->ToTimestampVal( http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/7843b472/be/src/exprs/scalar-expr-evaluator.cc ---------------------------------------------------------------------- diff --git a/be/src/exprs/scalar-expr-evaluator.cc b/be/src/exprs/scalar-expr-evaluator.cc index ae0aa15..6a401ef 100644 --- a/be/src/exprs/scalar-expr-evaluator.cc +++ b/be/src/exprs/scalar-expr-evaluator.cc @@ -325,11 +325,7 @@ void* ScalarExprEvaluator::GetValue(const ScalarExpr& expr, const TupleRow* row) if (v.is_null) return nullptr; result_.string_val.ptr = reinterpret_cast<char*>(v.ptr); result_.string_val.len = v.len; - if (expr.type_.IsVarLenStringType()) { - return &result_.string_val; - } else { - return result_.string_val.ptr; - } + return result_.string_val.ptr; } case TYPE_TIMESTAMP: { impala_udf::TimestampVal v = expr.GetTimestampVal(this, row); http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/7843b472/be/src/exprs/slot-ref.cc ---------------------------------------------------------------------- diff --git a/be/src/exprs/slot-ref.cc b/be/src/exprs/slot-ref.cc index 49931c3..5c2d552 100644 --- a/be/src/exprs/slot-ref.cc +++ b/be/src/exprs/slot-ref.cc @@ -428,8 +428,7 @@ StringVal SlotRef::GetStringVal( if (t == NULL || t->IsNull(null_indicator_offset_)) return StringVal::null(); StringVal result; if (type_.type == TYPE_CHAR) { - result.ptr = reinterpret_cast<uint8_t*>( - StringValue::CharSlotToPtr(t->GetSlot(slot_offset_), type_)); + result.ptr = reinterpret_cast<uint8_t*>(t->GetSlot(slot_offset_)); result.len = type_.len; } else { StringValue* sv = reinterpret_cast<StringValue*>(t->GetSlot(slot_offset_)); http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/7843b472/be/src/runtime/raw-value-ir.cc ---------------------------------------------------------------------- diff --git a/be/src/runtime/raw-value-ir.cc b/be/src/runtime/raw-value-ir.cc index 1458b15..93b77f2 100644 --- a/be/src/runtime/raw-value-ir.cc +++ b/be/src/runtime/raw-value-ir.cc @@ -80,8 +80,8 @@ int IR_ALWAYS_INLINE RawValue::Compare( ts_value2 = reinterpret_cast<const TimestampValue*>(v2); return *ts_value1 > *ts_value2 ? 1 : (*ts_value1 < *ts_value2 ? -1 : 0); case TYPE_CHAR: { - const char* v1ptr = StringValue::CharSlotToPtr(v1, type); - const char* v2ptr = StringValue::CharSlotToPtr(v2, type); + const char* v1ptr = reinterpret_cast<const char*>(v1); + const char* v2ptr = reinterpret_cast<const char*>(v2); int64_t l1 = StringValue::UnpaddedCharLength(v1ptr, type.len); int64_t l2 = StringValue::UnpaddedCharLength(v2ptr, type.len); return StringCompare(v1ptr, l1, v2ptr, l2, std::min(l1, l2)); http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/7843b472/be/src/runtime/raw-value.cc ---------------------------------------------------------------------- diff --git a/be/src/runtime/raw-value.cc b/be/src/runtime/raw-value.cc index b7b499d..bdcbb11 100644 --- a/be/src/runtime/raw-value.cc +++ b/be/src/runtime/raw-value.cc @@ -66,7 +66,7 @@ void RawValue::PrintValueAsBytes(const void* value, const ColumnType& type, stream->write(chars, TimestampValue::Size()); break; case TYPE_CHAR: - stream->write(StringValue::CharSlotToPtr(chars, type), type.len); + stream->write(chars, type.len); break; case TYPE_DECIMAL: stream->write(chars, type.GetByteSize()); @@ -102,7 +102,7 @@ void RawValue::PrintValue(const void* value, const ColumnType& type, int scale, str->swap(tmp); return; case TYPE_CHAR: - *str = string(StringValue::CharSlotToPtr(value, type), type.len); + *str = string(reinterpret_cast<const char*>(value), type.len); return; default: PrintValue(value, type, scale, &out); @@ -146,13 +146,7 @@ void RawValue::Write(const void* value, void* dst, const ColumnType& type, *reinterpret_cast<const TimestampValue*>(value); break; case TYPE_STRING: - case TYPE_VARCHAR: - case TYPE_CHAR: { - if (!type.IsVarLenStringType()) { - DCHECK_EQ(type.type, TYPE_CHAR); - memcpy(StringValue::CharSlotToPtr(dst, type), value, type.len); - break; - } + case TYPE_VARCHAR: { const StringValue* src = reinterpret_cast<const StringValue*>(value); StringValue* dest = reinterpret_cast<StringValue*>(dst); dest->len = src->len; @@ -168,6 +162,10 @@ void RawValue::Write(const void* value, void* dst, const ColumnType& type, } break; } + case TYPE_CHAR: + DCHECK_EQ(type.type, TYPE_CHAR); + memcpy(dst, value, type.len); + break; case TYPE_DECIMAL: memcpy(dst, value, type.GetByteSize()); break; @@ -218,7 +216,7 @@ uint32_t RawValue::GetHashValueFnv(const void* v, const ColumnType& type, uint32 case TYPE_DOUBLE: return HashUtil::FnvHash64to32(v, 8, seed); case TYPE_TIMESTAMP: return HashUtil::FnvHash64to32(v, 12, seed); case TYPE_CHAR: - return HashUtil::FnvHash64to32(StringValue::CharSlotToPtr(v, type), type.len, seed); + return HashUtil::FnvHash64to32(v, type.len, seed); case TYPE_DECIMAL: return HashUtil::FnvHash64to32(v, type.GetByteSize(), seed); default: DCHECK(false); return 0; } @@ -288,7 +286,7 @@ void RawValue::PrintValue( *stream << *reinterpret_cast<const TimestampValue*>(value); break; case TYPE_CHAR: - stream->write(StringValue::CharSlotToPtr(value, type), type.len); + stream->write(reinterpret_cast<const char*>(value), type.len); break; case TYPE_DECIMAL: switch (type.GetByteSize()) { http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/7843b472/be/src/runtime/raw-value.inline.h ---------------------------------------------------------------------- diff --git a/be/src/runtime/raw-value.inline.h b/be/src/runtime/raw-value.inline.h index 1214bad..03c3699 100644 --- a/be/src/runtime/raw-value.inline.h +++ b/be/src/runtime/raw-value.inline.h @@ -73,8 +73,8 @@ inline bool RawValue::Eq(const void* v1, const void* v2, const ColumnType& type) return *reinterpret_cast<const TimestampValue*>(v1) == *reinterpret_cast<const TimestampValue*>(v2); case TYPE_CHAR: { - const char* v1ptr = StringValue::CharSlotToPtr(v1, type); - const char* v2ptr = StringValue::CharSlotToPtr(v2, type); + const char* v1ptr = reinterpret_cast<const char*>(v1); + const char* v2ptr = reinterpret_cast<const char*>(v2); int64_t l1 = StringValue::UnpaddedCharLength(v1ptr, type.len); int64_t l2 = StringValue::UnpaddedCharLength(v2ptr, type.len); return StringCompare(v1ptr, l1, v2ptr, l2, std::min(l1, l2)) == 0; @@ -160,9 +160,10 @@ inline uint32_t RawValue::GetHashValueNonNull<impala::StringValue>( const impala::StringValue* v,const ColumnType& type, uint32_t seed) { DCHECK(v != NULL); if (type.type == TYPE_CHAR) { - return HashUtil::MurmurHash2_64( - StringValue::CharSlotToPtr(reinterpret_cast<const void*>(v), type), type.len, - seed); + // This is a inlined CHAR(n) slot. + // TODO: this is a bit wonky since it's not really a StringValue*. Handle CHAR(n) + // in a separate function. + return HashUtil::MurmurHash2_64(v, type.len, seed); } else { DCHECK(type.type == TYPE_STRING || type.type == TYPE_VARCHAR); if (v->len == 0) { http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/7843b472/be/src/runtime/string-value.h ---------------------------------------------------------------------- diff --git a/be/src/runtime/string-value.h b/be/src/runtime/string-value.h index 3fce865..5b7a7e4 100644 --- a/be/src/runtime/string-value.h +++ b/be/src/runtime/string-value.h @@ -121,12 +121,6 @@ struct StringValue { /// Returns number of characters in a char array (ignores trailing spaces) inline static int64_t UnpaddedCharLength(const char* cptr, int64_t len); - /// Converts a char slot to a pointer to the char string. - /// The slot should be a StringValue* or a char*, determined by type.IsVarLenStringType() - /// Returns NULL if the slot is null. - inline static char* CharSlotToPtr(void* slot, const ColumnType& type); - inline static const char* CharSlotToPtr(const void* slot, const ColumnType& type); - /// For C++/IR interop, we need to be able to look up types by name. static const char* LLVM_CLASS_NAME; }; http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/7843b472/be/src/runtime/string-value.inline.h ---------------------------------------------------------------------- diff --git a/be/src/runtime/string-value.inline.h b/be/src/runtime/string-value.inline.h index e6db93f..d76baee 100644 --- a/be/src/runtime/string-value.inline.h +++ b/be/src/runtime/string-value.inline.h @@ -141,28 +141,5 @@ inline int64_t StringValue::UnpaddedCharLength(const char* cptr, int64_t len) { while (last >= 0 && cptr[last] == ' ') --last; return last + 1; } - -inline char* StringValue::CharSlotToPtr(void* slot, const ColumnType& type) { - DCHECK(type.type == TYPE_CHAR); - if (slot == NULL) return NULL; - if (type.IsVarLenStringType()) { - StringValue* sv = reinterpret_cast<StringValue*>(slot); - DCHECK_EQ(sv->len, type.len); - return sv->ptr; - } - return reinterpret_cast<char*>(slot); -} - -inline const char* StringValue::CharSlotToPtr(const void* slot, const ColumnType& type) { - DCHECK(type.type == TYPE_CHAR); - if (slot == NULL) return NULL; - if (type.IsVarLenStringType()) { - const StringValue* sv = reinterpret_cast<const StringValue*>(slot); - DCHECK_EQ(sv->len, type.len); - return sv->ptr; - } - return reinterpret_cast<const char*>(slot); -} - } #endif http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/7843b472/be/src/runtime/types.h ---------------------------------------------------------------------- diff --git a/be/src/runtime/types.h b/be/src/runtime/types.h index 0657e5f..4961db2 100644 --- a/be/src/runtime/types.h +++ b/be/src/runtime/types.h @@ -76,7 +76,6 @@ struct ColumnType { int len; static const int MAX_VARCHAR_LENGTH = (1 << 16) - 1; // 65535 static const int MAX_CHAR_LENGTH = (1 << 8) - 1; // 255 - static const int MAX_CHAR_INLINE_LENGTH = (1 << 7); // 128 /// Only set if type == TYPE_DECIMAL int precision, scale; @@ -199,8 +198,7 @@ struct ColumnType { inline bool IsTimestampType() const { return type == TYPE_TIMESTAMP; } inline bool IsVarLenStringType() const { - return type == TYPE_STRING || type == TYPE_VARCHAR - || (type == TYPE_CHAR && len > MAX_CHAR_INLINE_LENGTH); + return type == TYPE_STRING || type == TYPE_VARCHAR; } inline bool IsComplexType() const { @@ -224,7 +222,6 @@ struct ColumnType { case TYPE_VARCHAR: return 0; case TYPE_CHAR: - if (IsVarLenStringType()) return 0; return len; case TYPE_NULL: case TYPE_BOOLEAN: @@ -258,7 +255,6 @@ struct ColumnType { case TYPE_VARCHAR: return 16; case TYPE_CHAR: - if (IsVarLenStringType()) return 16; return len; case TYPE_ARRAY: case TYPE_MAP: http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/7843b472/be/src/service/fe-support.cc ---------------------------------------------------------------------- diff --git a/be/src/service/fe-support.cc b/be/src/service/fe-support.cc index 44976ec..0bc6285 100644 --- a/be/src/service/fe-support.cc +++ b/be/src/service/fe-support.cc @@ -132,7 +132,7 @@ static void SetTColumnValue( break; } case TYPE_CHAR: - tmp.assign(StringValue::CharSlotToPtr(value, type), type.len); + tmp.assign(reinterpret_cast<const char*>(value), type.len); col_val->binary_val.swap(tmp); col_val->__isset.binary_val = true; break; http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/7843b472/be/src/service/hs2-util.cc ---------------------------------------------------------------------- diff --git a/be/src/service/hs2-util.cc b/be/src/service/hs2-util.cc index 7789b26..b856556 100644 --- a/be/src/service/hs2-util.cc +++ b/be/src/service/hs2-util.cc @@ -182,7 +182,7 @@ void impala::ExprValueToHS2TColumn(const void* value, const TColumnType& type, if (value != NULL) { ColumnType char_type = ColumnType::CreateCharType(type.types[0].scalar_type.len); column->stringVal.values.back().assign( - StringValue::CharSlotToPtr(value, char_type), char_type.len); + reinterpret_cast<const char*>(value), char_type.len); } nulls = &column->stringVal.nulls; break; @@ -346,7 +346,7 @@ void impala::ExprValueToHS2TColumnValue(const void* value, const TColumnType& ty if (not_null) { ColumnType char_type = ColumnType::CreateCharType(type.types[0].scalar_type.len); hs2_col_val->stringVal.value.assign( - StringValue::CharSlotToPtr(value, char_type), char_type.len); + reinterpret_cast<const char*>(value), char_type.len); } break; case TPrimitiveType::TIMESTAMP: http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/7843b472/fe/src/main/java/org/apache/impala/catalog/ScalarType.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/catalog/ScalarType.java b/fe/src/main/java/org/apache/impala/catalog/ScalarType.java index f43cf6c..ea51a3e 100644 --- a/fe/src/main/java/org/apache/impala/catalog/ScalarType.java +++ b/fe/src/main/java/org/apache/impala/catalog/ScalarType.java @@ -59,10 +59,6 @@ public class ScalarType extends Type { public static final int MAX_VARCHAR_LENGTH = (1 << 16) - 1; // 65535 public static final int MAX_CHAR_LENGTH = (1 << 8) - 1; // 255 - // Longest CHAR that we in line in the tuple. - // Keep consistent with backend ColumnType::CHAR_INLINE_LENGTH - public static final int CHAR_INLINE_LENGTH = (1 << 7); // 128 - // Hive, mysql, sql server standard. public static final int MAX_PRECISION = 38; public static final int MAX_SCALE = MAX_PRECISION; @@ -311,7 +307,6 @@ public class ScalarType extends Type { public int getSlotSize() { switch (type_) { case CHAR: - if (len_ > CHAR_INLINE_LENGTH || len_ == 0) return STRING.getSlotSize(); return len_; case DECIMAL: return TypesUtil.getDecimalSlotSize(this); default: http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/7843b472/testdata/workloads/functional-query/queries/QueryTest/spilling.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/spilling.test b/testdata/workloads/functional-query/queries/QueryTest/spilling.test index a08de58..0f0e2ca 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/spilling.test +++ b/testdata/workloads/functional-query/queries/QueryTest/spilling.test @@ -391,7 +391,7 @@ BIGINT row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\) ==== ---- QUERY -# Test sort with inlined char column materialized by exprs. +# Test sort with small char column materialized by exprs. # Set low memory limit to force spilling. # IMPALA-3332: comparator makes local allocations that cause runaway memory consumption. set num_nodes=0; @@ -431,7 +431,7 @@ row_regex: .*SpilledRuns: .* \([1-9][0-9]*\) row_regex: .*TotalMergesPerformed: .* \([1-9][0-9]*\) ==== ---- QUERY -# Test sort with input inlined char column materialized before sort. +# Test sort with small input char column materialized before sort. set num_nodes=0; set mem_limit=200m; set max_block_mgr_memory=4m; @@ -470,7 +470,7 @@ row_regex: .*SpilledRuns: .* \([1-9][0-9]*\) row_regex: .*TotalMergesPerformed: .* \([1-9][0-9]*\) ==== ---- QUERY -# Test sort with input non-inlined char column materialized before sort. +# Test sort with large input char column materialized before sort. # Set low memory limit to force spilling. set num_nodes=0; set mem_limit=200m;
