Repository: incubator-impala
Updated Branches:
  refs/heads/master fb8ea6a9c -> 7843b472f


IMPALA-5560: always store CHAR(N) inline in tuple

This is done to simplify the CHAR(N) logic. I believe this is overall an
improvement - any benefits of the out-of-line storage that motivated
this optimisation originally were outweighed by the added complexity.

This also avoids IMPALA-5559 (fe/be have different notions of var-len),
which will unblock IMPALA-3200.

Pros:
* Reduce the number of code paths and improve test coverage.
  (e.g. avoids IMPALA-5559: fe/be have different notions of var-len)
* Reduced memory to store non-NULL data (saves 12-byte StringValue)
* Fewer branches in code -> save CPU cycles.
* If CHAR(N) performance is important, reduced complexity makes it
  easier to implement codegen.

Cons:
* Requires N bytes to store a NULL value.
* May hurt cache locality (although this is speculative in my mind).

The change is mostly mechanical - I removed MAX_CHAR_INLINE_LENGTH
and then removed branches that depended on that.

Testing:
Ran exhaustive build.

Change-Id: I9c0b823ccff6b0c37f5267c548d096c29b8caac3
Reviewed-on: http://gerrit.cloudera.org:8080/7303
Reviewed-by: Tim Armstrong <[email protected]>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/7843b472
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/7843b472
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/7843b472

Branch: refs/heads/master
Commit: 7843b472f2fd04cd63dd5758aaf0cc082a7da025
Parents: fb8ea6a
Author: Tim Armstrong <[email protected]>
Authored: Thu Jun 22 10:45:14 2017 -0700
Committer: Impala Public Jenkins <[email protected]>
Committed: Fri Jun 30 22:49:40 2017 +0000

----------------------------------------------------------------------
 be/src/exec/hash-table.cc                       | 24 +++++++-----
 be/src/exec/hdfs-avro-scanner-test.cc           | 10 +----
 be/src/exec/hdfs-parquet-table-writer.cc        |  2 +-
 be/src/exec/hdfs-text-table-writer.cc           |  2 +-
 be/src/exec/parquet-column-readers.cc           | 40 +++++++-------------
 be/src/exprs/agg-fn-evaluator.cc                |  2 +-
 be/src/exprs/anyval-util.h                      | 23 +++++------
 be/src/exprs/scalar-expr-evaluator.cc           |  6 +--
 be/src/exprs/slot-ref.cc                        |  3 +-
 be/src/runtime/raw-value-ir.cc                  |  4 +-
 be/src/runtime/raw-value.cc                     | 20 +++++-----
 be/src/runtime/raw-value.inline.h               | 11 +++---
 be/src/runtime/string-value.h                   |  6 ---
 be/src/runtime/string-value.inline.h            | 23 -----------
 be/src/runtime/types.h                          |  6 +--
 be/src/service/fe-support.cc                    |  2 +-
 be/src/service/hs2-util.cc                      |  4 +-
 .../org/apache/impala/catalog/ScalarType.java   |  5 ---
 .../queries/QueryTest/spilling.test             |  6 +--
 19 files changed, 69 insertions(+), 130 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/7843b472/be/src/exec/hash-table.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hash-table.cc b/be/src/exec/hash-table.cc
index 0d456f6..a4856e9 100644
--- a/be/src/exec/hash-table.cc
+++ b/be/src/exec/hash-table.cc
@@ -69,15 +69,21 @@ static uint32_t SEED_PRIMES[] = {
 // We don't want(NULL, 1) to hash to the same as (0, 1).
 // This needs to be as big as the biggest primitive type since the bytes
 // get copied directly.
-// TODO find a better approach, since primitives like CHAR(N) can be up to 128 
bytes
-static int64_t NULL_VALUE[] = { HashUtil::FNV_SEED, HashUtil::FNV_SEED,
-                                HashUtil::FNV_SEED, HashUtil::FNV_SEED,
-                                HashUtil::FNV_SEED, HashUtil::FNV_SEED,
-                                HashUtil::FNV_SEED, HashUtil::FNV_SEED,
-                                HashUtil::FNV_SEED, HashUtil::FNV_SEED,
-                                HashUtil::FNV_SEED, HashUtil::FNV_SEED,
-                                HashUtil::FNV_SEED, HashUtil::FNV_SEED,
-                                HashUtil::FNV_SEED, HashUtil::FNV_SEED };
+// TODO find a better approach, since primitives like CHAR(N) can be up
+// to 255 bytes
+static int64_t NULL_VALUE[] = {
+  HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, 
HashUtil::FNV_SEED,
+  HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, 
HashUtil::FNV_SEED,
+  HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, 
HashUtil::FNV_SEED,
+  HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, 
HashUtil::FNV_SEED,
+  HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, 
HashUtil::FNV_SEED,
+  HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, 
HashUtil::FNV_SEED,
+  HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, 
HashUtil::FNV_SEED,
+  HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, 
HashUtil::FNV_SEED
+};
+
+static_assert(sizeof(NULL_VALUE) >= ColumnType::MAX_CHAR_LENGTH,
+    "NULL_VALUE must be at least as large as the largest possible slot");
 
 // The first NUM_SMALL_BLOCKS of nodes_ are made of blocks less than the IO 
size (of 8MB)
 // to reduce the memory footprint of small queries. In particular, we always 
first use a

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/7843b472/be/src/exec/hdfs-avro-scanner-test.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-avro-scanner-test.cc 
b/be/src/exec/hdfs-avro-scanner-test.cc
index 3567d5c..9358de4 100644
--- a/be/src/exec/hdfs-avro-scanner-test.cc
+++ b/be/src/exec/hdfs-avro-scanner-test.cc
@@ -151,14 +151,8 @@ class HdfsAvroScannerTest : public testing::Test {
     char slot[max<int>(sizeof(StringValue), max_len)];
     bool success = scanner_.ReadAvroChar(TYPE_CHAR, max_len, &new_data, data + 
data_len,
         true, slot, NULL);
-    StringValue value;
-    if (max_len <= ColumnType::MAX_CHAR_INLINE_LENGTH) {
-      // Convert to non-inlined string value for comparison.
-      value.len = max_len;
-      value.ptr = slot;
-    } else {
-      value = *reinterpret_cast<StringValue*>(slot);
-    }
+    // Convert to non-inlined string value for comparison.
+    StringValue value(slot, max_len);
     CheckReadResult(expected_val, expected_encoded_len, expected_error, value, 
success,
         new_data - data);
   }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/7843b472/be/src/exec/hdfs-parquet-table-writer.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-parquet-table-writer.cc 
b/be/src/exec/hdfs-parquet-table-writer.cc
index cc62ef8..5a2d810 100644
--- a/be/src/exec/hdfs-parquet-table-writer.cc
+++ b/be/src/exec/hdfs-parquet-table-writer.cc
@@ -387,7 +387,7 @@ template<>
 inline StringValue* 
HdfsParquetTableWriter::ColumnWriter<StringValue>::CastValue(
     void* value) {
   if (type().type == TYPE_CHAR) {
-    temp_.ptr = StringValue::CharSlotToPtr(value, type());
+    temp_.ptr = reinterpret_cast<char*>(value);
     temp_.len = StringValue::UnpaddedCharLength(temp_.ptr, type().len);
     return &temp_;
   }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/7843b472/be/src/exec/hdfs-text-table-writer.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-text-table-writer.cc 
b/be/src/exec/hdfs-text-table-writer.cc
index 82db972..aaee773 100644
--- a/be/src/exec/hdfs-text-table-writer.cc
+++ b/be/src/exec/hdfs-text-table-writer.cc
@@ -128,7 +128,7 @@ Status HdfsTextTableWriter::AppendRows(
         if (value != NULL) {
           const ColumnType& type = output_expr_evals_[j]->root().type();
           if (type.type == TYPE_CHAR) {
-            char* val_ptr = StringValue::CharSlotToPtr(value, type);
+            char* val_ptr = reinterpret_cast<char*>(value);
             StringValue sv(val_ptr, StringValue::UnpaddedCharLength(val_ptr, 
type.len));
             PrintEscaped(&sv);
           } else if (type.IsVarLenStringType()) {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/7843b472/be/src/exec/parquet-column-readers.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/parquet-column-readers.cc 
b/be/src/exec/parquet-column-readers.cc
index 9d4a329..9f00762 100644
--- a/be/src/exec/parquet-column-readers.cc
+++ b/be/src/exec/parquet-column-readers.cc
@@ -498,7 +498,7 @@ class ScalarColumnReader : public BaseScalarColumnReader {
       return false;
     }
     if (UNLIKELY(NeedsConversionInline() && 
!tuple->IsNull(null_indicator_offset_)
-            && !ConvertSlot(val_ptr, reinterpret_cast<T*>(slot), pool))) {
+            && !ConvertSlot(val_ptr, slot, pool))) {
       return false;
     }
     return true;
@@ -520,8 +520,8 @@ class ScalarColumnReader : public BaseScalarColumnReader {
     return false;
   }
 
-  /// Converts and writes src into dst based on desc_->type()
-  bool ConvertSlot(const T* src, T* dst, MemPool* pool) {
+  /// Converts and writes 'src' into 'slot' based on desc_->type()
+  bool ConvertSlot(const T* src, void* slot, MemPool* pool) {
     DCHECK(false);
     return false;
   }
@@ -564,29 +564,14 @@ inline bool ScalarColumnReader<StringValue, 
true>::NeedsConversionInline() const
 
 template<>
 bool ScalarColumnReader<StringValue, true>::ConvertSlot(
-    const StringValue* src, StringValue* dst, MemPool* pool) {
+    const StringValue* src, void* slot, MemPool* pool) {
   DCHECK(slot_desc() != NULL);
   DCHECK(slot_desc()->type().type == TYPE_CHAR);
-  int len = slot_desc()->type().len;
-  StringValue sv;
-  sv.len = len;
-  if (slot_desc()->type().IsVarLenStringType()) {
-    sv.ptr = reinterpret_cast<char*>(pool->TryAllocate(len));
-    if (UNLIKELY(sv.ptr == NULL)) {
-      string details = Substitute(PARQUET_COL_MEM_LIMIT_EXCEEDED, 
"ConvertSlot",
-          len, "StringValue");
-      parent_->parse_status_ =
-          pool->mem_tracker()->MemLimitExceeded(parent_->state_, details, len);
-      return false;
-    }
-  } else {
-    sv.ptr = reinterpret_cast<char*>(dst);
-  }
-  int unpadded_len = min(len, src->len);
-  memcpy(sv.ptr, src->ptr, unpadded_len);
-  StringValue::PadWithSpaces(sv.ptr, len, unpadded_len);
-
-  if (slot_desc()->type().IsVarLenStringType()) *dst = sv;
+  int char_len = slot_desc()->type().len;
+  int unpadded_len = min(char_len, src->len);
+  char* dst_char = reinterpret_cast<char*>(slot);
+  memcpy(dst_char, src->ptr, unpadded_len);
+  StringValue::PadWithSpaces(dst_char, char_len, unpadded_len);
   return true;
 }
 
@@ -597,11 +582,12 @@ inline bool ScalarColumnReader<TimestampValue, 
true>::NeedsConversionInline() co
 
 template<>
 bool ScalarColumnReader<TimestampValue, true>::ConvertSlot(
-    const TimestampValue* src, TimestampValue* dst, MemPool* pool) {
+    const TimestampValue* src, void* slot, MemPool* pool) {
   // Conversion should only happen when this flag is enabled.
   DCHECK(FLAGS_convert_legacy_hive_parquet_utc_timestamps);
-  *dst = *src;
-  if (dst->HasDateAndTime()) dst->UtcToLocal();
+  TimestampValue* dst_ts = reinterpret_cast<TimestampValue*>(slot);
+  *dst_ts = *src;
+  if (dst_ts->HasDateAndTime()) dst_ts->UtcToLocal();
   return true;
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/7843b472/be/src/exprs/agg-fn-evaluator.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/agg-fn-evaluator.cc b/be/src/exprs/agg-fn-evaluator.cc
index 910eef1..860c78d 100644
--- a/be/src/exprs/agg-fn-evaluator.cc
+++ b/be/src/exprs/agg-fn-evaluator.cc
@@ -279,7 +279,7 @@ void AggFnEvaluator::Init(Tuple* dst) {
     void* slot = dst->GetSlot(slot_desc.tuple_offset());
     StringVal* sv = reinterpret_cast<StringVal*>(staging_intermediate_val_);
     sv->is_null = dst->IsNull(slot_desc.null_indicator_offset());
-    sv->ptr = reinterpret_cast<uint8_t*>(StringValue::CharSlotToPtr(slot, 
type));
+    sv->ptr = reinterpret_cast<uint8_t*>(slot);
     sv->len = type.len;
   }
   reinterpret_cast<InitFn>(agg_fn_.init_fn_)(

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/7843b472/be/src/exprs/anyval-util.h
----------------------------------------------------------------------
diff --git a/be/src/exprs/anyval-util.h b/be/src/exprs/anyval-util.h
index a18786e..c908ced 100644
--- a/be/src/exprs/anyval-util.h
+++ b/be/src/exprs/anyval-util.h
@@ -279,22 +279,19 @@ class AnyValUtil {
         return;
       case TYPE_STRING:
       case TYPE_VARCHAR:
-      case TYPE_CHAR: {
-        if (type.IsVarLenStringType()) {
-          reinterpret_cast<const StringValue*>(slot)->ToStringVal(
-              reinterpret_cast<StringVal*>(dst));
-          if (type.type == TYPE_VARCHAR) {
-            StringVal* sv = reinterpret_cast<StringVal*>(dst);
-            DCHECK_GE(type.len, 0);
-            DCHECK_LE(sv->len, type.len);
-          }
-        } else {
-          DCHECK_EQ(type.type, TYPE_CHAR);
+        reinterpret_cast<const StringValue*>(slot)->ToStringVal(
+            reinterpret_cast<StringVal*>(dst));
+        if (type.type == TYPE_VARCHAR) {
           StringVal* sv = reinterpret_cast<StringVal*>(dst);
-          sv->ptr = const_cast<uint8_t*>(reinterpret_cast<const 
uint8_t*>(slot));
-          sv->len = type.len;
+          DCHECK_GE(type.len, 0);
+          DCHECK_LE(sv->len, type.len);
         }
         return;
+      case TYPE_CHAR: {
+        StringVal* sv = reinterpret_cast<StringVal*>(dst);
+        sv->ptr = const_cast<uint8_t*>(reinterpret_cast<const uint8_t*>(slot));
+        sv->len = type.len;
+        return;
       }
       case TYPE_TIMESTAMP:
         reinterpret_cast<const TimestampValue*>(slot)->ToTimestampVal(

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/7843b472/be/src/exprs/scalar-expr-evaluator.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/scalar-expr-evaluator.cc 
b/be/src/exprs/scalar-expr-evaluator.cc
index ae0aa15..6a401ef 100644
--- a/be/src/exprs/scalar-expr-evaluator.cc
+++ b/be/src/exprs/scalar-expr-evaluator.cc
@@ -325,11 +325,7 @@ void* ScalarExprEvaluator::GetValue(const ScalarExpr& 
expr, const TupleRow* row)
       if (v.is_null) return nullptr;
       result_.string_val.ptr = reinterpret_cast<char*>(v.ptr);
       result_.string_val.len = v.len;
-      if (expr.type_.IsVarLenStringType()) {
-        return &result_.string_val;
-      } else {
-        return result_.string_val.ptr;
-      }
+      return result_.string_val.ptr;
     }
     case TYPE_TIMESTAMP: {
       impala_udf::TimestampVal v = expr.GetTimestampVal(this, row);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/7843b472/be/src/exprs/slot-ref.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/slot-ref.cc b/be/src/exprs/slot-ref.cc
index 49931c3..5c2d552 100644
--- a/be/src/exprs/slot-ref.cc
+++ b/be/src/exprs/slot-ref.cc
@@ -428,8 +428,7 @@ StringVal SlotRef::GetStringVal(
   if (t == NULL || t->IsNull(null_indicator_offset_)) return StringVal::null();
   StringVal result;
   if (type_.type == TYPE_CHAR) {
-    result.ptr = reinterpret_cast<uint8_t*>(
-        StringValue::CharSlotToPtr(t->GetSlot(slot_offset_), type_));
+    result.ptr = reinterpret_cast<uint8_t*>(t->GetSlot(slot_offset_));
     result.len = type_.len;
   } else {
     StringValue* sv = reinterpret_cast<StringValue*>(t->GetSlot(slot_offset_));

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/7843b472/be/src/runtime/raw-value-ir.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/raw-value-ir.cc b/be/src/runtime/raw-value-ir.cc
index 1458b15..93b77f2 100644
--- a/be/src/runtime/raw-value-ir.cc
+++ b/be/src/runtime/raw-value-ir.cc
@@ -80,8 +80,8 @@ int IR_ALWAYS_INLINE RawValue::Compare(
       ts_value2 = reinterpret_cast<const TimestampValue*>(v2);
       return *ts_value1 > *ts_value2 ? 1 : (*ts_value1 < *ts_value2 ? -1 : 0);
     case TYPE_CHAR: {
-      const char* v1ptr = StringValue::CharSlotToPtr(v1, type);
-      const char* v2ptr = StringValue::CharSlotToPtr(v2, type);
+      const char* v1ptr = reinterpret_cast<const char*>(v1);
+      const char* v2ptr = reinterpret_cast<const char*>(v2);
       int64_t l1 = StringValue::UnpaddedCharLength(v1ptr, type.len);
       int64_t l2 = StringValue::UnpaddedCharLength(v2ptr, type.len);
       return StringCompare(v1ptr, l1, v2ptr, l2, std::min(l1, l2));

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/7843b472/be/src/runtime/raw-value.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/raw-value.cc b/be/src/runtime/raw-value.cc
index b7b499d..bdcbb11 100644
--- a/be/src/runtime/raw-value.cc
+++ b/be/src/runtime/raw-value.cc
@@ -66,7 +66,7 @@ void RawValue::PrintValueAsBytes(const void* value, const 
ColumnType& type,
       stream->write(chars, TimestampValue::Size());
       break;
     case TYPE_CHAR:
-      stream->write(StringValue::CharSlotToPtr(chars, type), type.len);
+      stream->write(chars, type.len);
       break;
     case TYPE_DECIMAL:
       stream->write(chars, type.GetByteSize());
@@ -102,7 +102,7 @@ void RawValue::PrintValue(const void* value, const 
ColumnType& type, int scale,
       str->swap(tmp);
       return;
     case TYPE_CHAR:
-      *str = string(StringValue::CharSlotToPtr(value, type), type.len);
+      *str = string(reinterpret_cast<const char*>(value), type.len);
       return;
     default:
       PrintValue(value, type, scale, &out);
@@ -146,13 +146,7 @@ void RawValue::Write(const void* value, void* dst, const 
ColumnType& type,
           *reinterpret_cast<const TimestampValue*>(value);
       break;
     case TYPE_STRING:
-    case TYPE_VARCHAR:
-    case TYPE_CHAR: {
-      if (!type.IsVarLenStringType()) {
-        DCHECK_EQ(type.type, TYPE_CHAR);
-        memcpy(StringValue::CharSlotToPtr(dst, type), value, type.len);
-        break;
-      }
+    case TYPE_VARCHAR: {
       const StringValue* src = reinterpret_cast<const StringValue*>(value);
       StringValue* dest = reinterpret_cast<StringValue*>(dst);
       dest->len = src->len;
@@ -168,6 +162,10 @@ void RawValue::Write(const void* value, void* dst, const 
ColumnType& type,
       }
       break;
     }
+    case TYPE_CHAR:
+      DCHECK_EQ(type.type, TYPE_CHAR);
+      memcpy(dst, value, type.len);
+      break;
     case TYPE_DECIMAL:
       memcpy(dst, value, type.GetByteSize());
       break;
@@ -218,7 +216,7 @@ uint32_t RawValue::GetHashValueFnv(const void* v, const 
ColumnType& type, uint32
     case TYPE_DOUBLE: return HashUtil::FnvHash64to32(v, 8, seed);
     case TYPE_TIMESTAMP: return HashUtil::FnvHash64to32(v, 12, seed);
     case TYPE_CHAR:
-      return HashUtil::FnvHash64to32(StringValue::CharSlotToPtr(v, type), 
type.len, seed);
+      return HashUtil::FnvHash64to32(v, type.len, seed);
     case TYPE_DECIMAL: return HashUtil::FnvHash64to32(v, type.GetByteSize(), 
seed);
     default: DCHECK(false); return 0;
   }
@@ -288,7 +286,7 @@ void RawValue::PrintValue(
       *stream << *reinterpret_cast<const TimestampValue*>(value);
       break;
     case TYPE_CHAR:
-      stream->write(StringValue::CharSlotToPtr(value, type), type.len);
+      stream->write(reinterpret_cast<const char*>(value), type.len);
       break;
     case TYPE_DECIMAL:
       switch (type.GetByteSize()) {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/7843b472/be/src/runtime/raw-value.inline.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/raw-value.inline.h 
b/be/src/runtime/raw-value.inline.h
index 1214bad..03c3699 100644
--- a/be/src/runtime/raw-value.inline.h
+++ b/be/src/runtime/raw-value.inline.h
@@ -73,8 +73,8 @@ inline bool RawValue::Eq(const void* v1, const void* v2, 
const ColumnType& type)
       return *reinterpret_cast<const TimestampValue*>(v1) ==
           *reinterpret_cast<const TimestampValue*>(v2);
     case TYPE_CHAR: {
-      const char* v1ptr = StringValue::CharSlotToPtr(v1, type);
-      const char* v2ptr = StringValue::CharSlotToPtr(v2, type);
+      const char* v1ptr = reinterpret_cast<const char*>(v1);
+      const char* v2ptr = reinterpret_cast<const char*>(v2);
       int64_t l1 = StringValue::UnpaddedCharLength(v1ptr, type.len);
       int64_t l2 = StringValue::UnpaddedCharLength(v2ptr, type.len);
       return StringCompare(v1ptr, l1, v2ptr, l2, std::min(l1, l2)) == 0;
@@ -160,9 +160,10 @@ inline uint32_t 
RawValue::GetHashValueNonNull<impala::StringValue>(
     const impala::StringValue* v,const ColumnType& type, uint32_t seed) {
   DCHECK(v != NULL);
   if (type.type == TYPE_CHAR) {
-    return HashUtil::MurmurHash2_64(
-        StringValue::CharSlotToPtr(reinterpret_cast<const void*>(v), type), 
type.len,
-        seed);
+    // This is a inlined CHAR(n) slot.
+    // TODO: this is a bit wonky since it's not really a StringValue*. Handle 
CHAR(n)
+    // in a separate function.
+    return HashUtil::MurmurHash2_64(v, type.len, seed);
   } else {
     DCHECK(type.type == TYPE_STRING || type.type == TYPE_VARCHAR);
     if (v->len == 0) {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/7843b472/be/src/runtime/string-value.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/string-value.h b/be/src/runtime/string-value.h
index 3fce865..5b7a7e4 100644
--- a/be/src/runtime/string-value.h
+++ b/be/src/runtime/string-value.h
@@ -121,12 +121,6 @@ struct StringValue {
   /// Returns number of characters in a char array (ignores trailing spaces)
   inline static int64_t UnpaddedCharLength(const char* cptr, int64_t len);
 
-  /// Converts a char slot to a pointer to the char string.
-  /// The slot should be a StringValue* or a char*, determined by 
type.IsVarLenStringType()
-  /// Returns NULL if the slot is null.
-  inline static char* CharSlotToPtr(void* slot, const ColumnType& type);
-  inline static const char* CharSlotToPtr(const void* slot, const ColumnType& 
type);
-
   /// For C++/IR interop, we need to be able to look up types by name.
   static const char* LLVM_CLASS_NAME;
 };

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/7843b472/be/src/runtime/string-value.inline.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/string-value.inline.h 
b/be/src/runtime/string-value.inline.h
index e6db93f..d76baee 100644
--- a/be/src/runtime/string-value.inline.h
+++ b/be/src/runtime/string-value.inline.h
@@ -141,28 +141,5 @@ inline int64_t StringValue::UnpaddedCharLength(const char* 
cptr, int64_t len) {
   while (last >= 0 && cptr[last] == ' ') --last;
   return last + 1;
 }
-
-inline char* StringValue::CharSlotToPtr(void* slot, const ColumnType& type) {
-  DCHECK(type.type == TYPE_CHAR);
-  if (slot == NULL) return NULL;
-  if (type.IsVarLenStringType()) {
-    StringValue* sv = reinterpret_cast<StringValue*>(slot);
-    DCHECK_EQ(sv->len, type.len);
-    return sv->ptr;
-  }
-  return reinterpret_cast<char*>(slot);
-}
-
-inline const char* StringValue::CharSlotToPtr(const void* slot, const 
ColumnType& type) {
-  DCHECK(type.type == TYPE_CHAR);
-  if (slot == NULL) return NULL;
-  if (type.IsVarLenStringType()) {
-    const StringValue* sv = reinterpret_cast<const StringValue*>(slot);
-    DCHECK_EQ(sv->len, type.len);
-    return sv->ptr;
-  }
-  return reinterpret_cast<const char*>(slot);
-}
-
 }
 #endif

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/7843b472/be/src/runtime/types.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/types.h b/be/src/runtime/types.h
index 0657e5f..4961db2 100644
--- a/be/src/runtime/types.h
+++ b/be/src/runtime/types.h
@@ -76,7 +76,6 @@ struct ColumnType {
   int len;
   static const int MAX_VARCHAR_LENGTH = (1 << 16) - 1; // 65535
   static const int MAX_CHAR_LENGTH = (1 << 8) - 1; // 255
-  static const int MAX_CHAR_INLINE_LENGTH = (1 << 7); // 128
 
   /// Only set if type == TYPE_DECIMAL
   int precision, scale;
@@ -199,8 +198,7 @@ struct ColumnType {
   inline bool IsTimestampType() const { return type == TYPE_TIMESTAMP; }
 
   inline bool IsVarLenStringType() const {
-    return type == TYPE_STRING || type == TYPE_VARCHAR
-        || (type == TYPE_CHAR && len > MAX_CHAR_INLINE_LENGTH);
+    return type == TYPE_STRING || type == TYPE_VARCHAR;
   }
 
   inline bool IsComplexType() const {
@@ -224,7 +222,6 @@ struct ColumnType {
       case TYPE_VARCHAR:
         return 0;
       case TYPE_CHAR:
-        if (IsVarLenStringType()) return 0;
         return len;
       case TYPE_NULL:
       case TYPE_BOOLEAN:
@@ -258,7 +255,6 @@ struct ColumnType {
       case TYPE_VARCHAR:
         return 16;
       case TYPE_CHAR:
-        if (IsVarLenStringType()) return 16;
         return len;
       case TYPE_ARRAY:
       case TYPE_MAP:

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/7843b472/be/src/service/fe-support.cc
----------------------------------------------------------------------
diff --git a/be/src/service/fe-support.cc b/be/src/service/fe-support.cc
index 44976ec..0bc6285 100644
--- a/be/src/service/fe-support.cc
+++ b/be/src/service/fe-support.cc
@@ -132,7 +132,7 @@ static void SetTColumnValue(
       break;
     }
     case TYPE_CHAR:
-      tmp.assign(StringValue::CharSlotToPtr(value, type), type.len);
+      tmp.assign(reinterpret_cast<const char*>(value), type.len);
       col_val->binary_val.swap(tmp);
       col_val->__isset.binary_val = true;
       break;

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/7843b472/be/src/service/hs2-util.cc
----------------------------------------------------------------------
diff --git a/be/src/service/hs2-util.cc b/be/src/service/hs2-util.cc
index 7789b26..b856556 100644
--- a/be/src/service/hs2-util.cc
+++ b/be/src/service/hs2-util.cc
@@ -182,7 +182,7 @@ void impala::ExprValueToHS2TColumn(const void* value, const 
TColumnType& type,
       if (value != NULL) {
         ColumnType char_type = 
ColumnType::CreateCharType(type.types[0].scalar_type.len);
         column->stringVal.values.back().assign(
-            StringValue::CharSlotToPtr(value, char_type), char_type.len);
+            reinterpret_cast<const char*>(value), char_type.len);
       }
       nulls = &column->stringVal.nulls;
       break;
@@ -346,7 +346,7 @@ void impala::ExprValueToHS2TColumnValue(const void* value, 
const TColumnType& ty
       if (not_null) {
         ColumnType char_type = 
ColumnType::CreateCharType(type.types[0].scalar_type.len);
         hs2_col_val->stringVal.value.assign(
-           StringValue::CharSlotToPtr(value, char_type), char_type.len);
+           reinterpret_cast<const char*>(value), char_type.len);
       }
       break;
     case TPrimitiveType::TIMESTAMP:

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/7843b472/fe/src/main/java/org/apache/impala/catalog/ScalarType.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/catalog/ScalarType.java 
b/fe/src/main/java/org/apache/impala/catalog/ScalarType.java
index f43cf6c..ea51a3e 100644
--- a/fe/src/main/java/org/apache/impala/catalog/ScalarType.java
+++ b/fe/src/main/java/org/apache/impala/catalog/ScalarType.java
@@ -59,10 +59,6 @@ public class ScalarType extends Type {
   public static final int MAX_VARCHAR_LENGTH = (1 << 16) - 1; // 65535
   public static final int MAX_CHAR_LENGTH = (1 << 8) - 1; // 255
 
-  // Longest CHAR that we in line in the tuple.
-  // Keep consistent with backend ColumnType::CHAR_INLINE_LENGTH
-  public static final int CHAR_INLINE_LENGTH = (1 << 7); // 128
-
   // Hive, mysql, sql server standard.
   public static final int MAX_PRECISION = 38;
   public static final int MAX_SCALE = MAX_PRECISION;
@@ -311,7 +307,6 @@ public class ScalarType extends Type {
   public int getSlotSize() {
     switch (type_) {
       case CHAR:
-        if (len_ > CHAR_INLINE_LENGTH || len_ == 0) return 
STRING.getSlotSize();
         return len_;
       case DECIMAL: return TypesUtil.getDecimalSlotSize(this);
       default:

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/7843b472/testdata/workloads/functional-query/queries/QueryTest/spilling.test
----------------------------------------------------------------------
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/spilling.test 
b/testdata/workloads/functional-query/queries/QueryTest/spilling.test
index a08de58..0f0e2ca 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/spilling.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/spilling.test
@@ -391,7 +391,7 @@ BIGINT
 row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
 ====
 ---- QUERY
-# Test sort with inlined char column materialized by exprs.
+# Test sort with small char column materialized by exprs.
 # Set low memory limit to force spilling.
 # IMPALA-3332: comparator makes local allocations that cause runaway memory 
consumption.
 set num_nodes=0;
@@ -431,7 +431,7 @@ row_regex: .*SpilledRuns: .* \([1-9][0-9]*\)
 row_regex: .*TotalMergesPerformed: .* \([1-9][0-9]*\)
 ====
 ---- QUERY
-# Test sort with input inlined char column materialized before sort.
+# Test sort with small input char column materialized before sort.
 set num_nodes=0;
 set mem_limit=200m;
 set max_block_mgr_memory=4m;
@@ -470,7 +470,7 @@ row_regex: .*SpilledRuns: .* \([1-9][0-9]*\)
 row_regex: .*TotalMergesPerformed: .* \([1-9][0-9]*\)
 ====
 ---- QUERY
-# Test sort with input non-inlined char column materialized before sort.
+# Test sort with large input char column materialized before sort.
 # Set low memory limit to force spilling.
 set num_nodes=0;
 set mem_limit=200m;

Reply via email to