IMPALA-5573: Add decimal codegen in text scanner

This patch adds decimal type codegen support in text scanner. Currently
codegen would be disabled if there is a decimal column. With this patch
StringParser::StringToDecimal will be called in generated code. A new
file util/string-parser.cc is created and linked into libUtil. This file
contains proxy functions to StringToDecimal in ordered to keep
StringToDecimal out of LLVM IR.

In a benchmark query:
> select l_quantity, l_extendedprice, l_discount, l_tax from biglineitem where 
> l_quantity > 100.0;
where biglineitem is tpch.lineitem repeated 6 times, the codegen version
is 19% faster than non-codgen version in scanning, and 8% faster in
query time. Codegen time in this simple case is 69ms.

Simple performance tests show that putting the parser in libUtil instead
of impala-sse.bc would reduce codegen time by 2/3 in cases where only
one decimal column is parsed while the scanning time is nearly the same.

Change-Id: Ia65820e969d59094dc92d912a5279fa90f6b179d
Reviewed-on: http://gerrit.cloudera.org:8080/7683
Reviewed-by: Tim Armstrong <[email protected]>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/679ebc1a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/679ebc1a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/679ebc1a

Branch: refs/heads/master
Commit: 679ebc1ac66e07edc0e81b34c8ed56a0a8aff0ca
Parents: 3b1a1df
Author: Tianyi Wang <[email protected]>
Authored: Thu Aug 17 17:35:31 2017 -0700
Committer: Impala Public Jenkins <[email protected]>
Committed: Wed Aug 23 06:24:12 2017 +0000

----------------------------------------------------------------------
 be/src/codegen/gen_ir_descriptions.py |  5 ++-
 be/src/exec/hdfs-scanner-ir.cc        | 53 ++++++++++++++++++++++++------
 be/src/exec/hdfs-scanner.cc           |  8 -----
 be/src/exec/text-converter.cc         | 46 +++++++++++++++++++++-----
 be/src/util/CMakeLists.txt            |  1 +
 be/src/util/string-parser.cc          | 40 ++++++++++++++++++++++
 be/src/util/string-parser.h           | 11 ++++++-
 7 files changed, 136 insertions(+), 28 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/679ebc1a/be/src/codegen/gen_ir_descriptions.py
----------------------------------------------------------------------
diff --git a/be/src/codegen/gen_ir_descriptions.py 
b/be/src/codegen/gen_ir_descriptions.py
index 3cc195c..668ae24 100755
--- a/be/src/codegen/gen_ir_descriptions.py
+++ b/be/src/codegen/gen_ir_descriptions.py
@@ -179,13 +179,16 @@ ir_functions = [
   ["PARQUET_SCANNER_EVAL_RUNTIME_FILTER",
    "_ZN6impala18HdfsParquetScanner17EvalRuntimeFilterEiPNS_8TupleRowE"],
   ["STRING_TO_BOOL", "IrStringToBool"],
-  ["STRING_TO_INT8", 
"_Z14IrStringToInt8PKciPN6impala12StringParser11ParseResultE"],
+  ["STRING_TO_INT8", "IrStringToInt8"],
   ["STRING_TO_INT16", "IrStringToInt16"],
   ["STRING_TO_INT32", "IrStringToInt32"],
   ["STRING_TO_INT64", "IrStringToInt64"],
   ["STRING_TO_FLOAT", "IrStringToFloat"],
   ["STRING_TO_DOUBLE", "IrStringToDouble"],
   ["STRING_TO_TIMESTAMP", "IrStringToTimestamp"],
+  ["STRING_TO_DECIMAL4", "IrStringToDecimal4"],
+  ["STRING_TO_DECIMAL8", "IrStringToDecimal8"],
+  ["STRING_TO_DECIMAL16", "IrStringToDecimal16"],
   ["IS_NULL_STRING", "IrIsNullString"],
   ["GENERIC_IS_NULL_STRING", "IrGenericIsNullString"],
   ["RAW_VALUE_COMPARE",

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/679ebc1a/be/src/exec/hdfs-scanner-ir.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-scanner-ir.cc b/be/src/exec/hdfs-scanner-ir.cc
index 143adb7..867ce1e 100644
--- a/be/src/exec/hdfs-scanner-ir.cc
+++ b/be/src/exec/hdfs-scanner-ir.cc
@@ -80,46 +80,79 @@ ScalarExprEvaluator* HdfsScanner::GetConjunctEval(int idx) 
const {
   return (*conjunct_evals_)[idx];
 }
 
+void StringToDecimalSymbolDummy() {
+  // Force linker to to link the object file containing these functions.
+  StringToDecimal4(nullptr, 0, 0, 0, nullptr);
+  StringToDecimal8(nullptr, 0, 0, 0, nullptr);
+  StringToDecimal16(nullptr, 0, 0, 0, nullptr);
+}
+
 // Define the string parsing functions for llvm.  Stamp out the templated 
functions
 #ifdef IR_COMPILE
+using ParseResult = StringParser::ParseResult;
 extern "C"
-bool IrStringToBool(const char* s, int len, StringParser::ParseResult* result) 
{
+bool IrStringToBool(const char* s, int len, ParseResult* result) {
   return StringParser::StringToBool(s, len, result);
 }
 
-int8_t IrStringToInt8(const char* s, int len, StringParser::ParseResult* 
result) {
+extern "C"
+int8_t IrStringToInt8(const char* s, int len, ParseResult* result) {
   return StringParser::StringToInt<int8_t>(s, len, result);
 }
 
 extern "C"
-int16_t IrStringToInt16(const char* s, int len, StringParser::ParseResult* 
result) {
+int16_t IrStringToInt16(const char* s, int len, ParseResult* result) {
   return StringParser::StringToInt<int16_t>(s, len, result);
 }
 
 extern "C"
-int32_t IrStringToInt32(const char* s, int len, StringParser::ParseResult* 
result) {
+int32_t IrStringToInt32(const char* s, int len, ParseResult* result) {
   return StringParser::StringToInt<int32_t>(s, len, result);
 }
 
 extern "C"
-int64_t IrStringToInt64(const char* s, int len, StringParser::ParseResult* 
result) {
+int64_t IrStringToInt64(const char* s, int len, ParseResult* result) {
   return StringParser::StringToInt<int64_t>(s, len, result);
 }
 
 extern "C"
-float IrStringToFloat(const char* s, int len, StringParser::ParseResult* 
result) {
+float IrStringToFloat(const char* s, int len, ParseResult* result) {
   return StringParser::StringToFloat<float>(s, len, result);
 }
 
 extern "C"
-double IrStringToDouble(const char* s, int len, StringParser::ParseResult* 
result) {
+double IrStringToDouble(const char* s, int len, ParseResult* result) {
   return StringParser::StringToFloat<double>(s, len, result);
 }
 
 extern "C"
-TimestampValue IrStringToTimestamp(const char* s, int len,
-    StringParser::ParseResult* result) {
-  return StringParser::StringToTimestamp(s, len, result);
+void IrStringToTimestamp(TimestampValue* out, const char* s, int len,
+    ParseResult* result) {
+  *out = StringParser::StringToTimestamp(s, len, result);
+}
+
+extern "C"
+Decimal4Value IrStringToDecimal4(const char* s, int len, int type_precision,
+    int type_scale, ParseResult* result)  {
+  auto ret = StringToDecimal4(s, len, type_precision, type_scale, result);
+  if (*result != ParseResult::PARSE_SUCCESS) *result = 
ParseResult::PARSE_FAILURE;
+  return ret;
+}
+
+extern "C"
+Decimal8Value IrStringToDecimal8(const char* s, int len, int type_precision,
+    int type_scale, ParseResult* result)  {
+  auto ret = StringToDecimal8(s, len, type_precision, type_scale, result);
+  if (*result != ParseResult::PARSE_SUCCESS) *result = 
ParseResult::PARSE_FAILURE;
+  return ret;
+}
+
+extern "C"
+Decimal16Value IrStringToDecimal16(const char* s, int len, int type_precision,
+    int type_scale, ParseResult* result)  {
+  auto ret = StringToDecimal16(s, len, type_precision, type_scale, result);
+  if (*result != ParseResult::PARSE_SUCCESS) *result = 
ParseResult::PARSE_FAILURE;
+  return ret;
 }
 
 extern "C"

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/679ebc1a/be/src/exec/hdfs-scanner.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-scanner.cc b/be/src/exec/hdfs-scanner.cc
index 0ef62ab..e81a72a 100644
--- a/be/src/exec/hdfs-scanner.cc
+++ b/be/src/exec/hdfs-scanner.cc
@@ -308,14 +308,6 @@ Status 
HdfsScanner::CodegenWriteCompleteTuple(HdfsScanNodeBase* node,
   SCOPED_TIMER(codegen->codegen_timer());
   RuntimeState* state = node->runtime_state();
 
-  // TODO: Timestamp is not yet supported
-  for (int i = 0; i < node->materialized_slots().size(); ++i) {
-    SlotDescriptor* slot_desc = node->materialized_slots()[i];
-    if (slot_desc->type().type == TYPE_DECIMAL) {
-      return Status::Expected("Decimal not yet supported for codegen.");
-    }
-  }
-
   // Cast away const-ness.  The codegen only sets the cached typed llvm struct.
   TupleDescriptor* tuple_desc = 
const_cast<TupleDescriptor*>(node->tuple_desc());
   vector<Function*> slot_fns;

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/679ebc1a/be/src/exec/text-converter.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/text-converter.cc b/be/src/exec/text-converter.cc
index 431a11b..0cad05c 100644
--- a/be/src/exec/text-converter.cc
+++ b/be/src/exec/text-converter.cc
@@ -229,6 +229,23 @@ Status TextConverter::CodegenWriteSlot(LlvmCodeGen* 
codegen,
       case TYPE_TIMESTAMP:
         parse_fn_enum = IRFunction::STRING_TO_TIMESTAMP;
         break;
+      case TYPE_DECIMAL:
+        switch (slot_desc->slot_size()) {
+          case 4:
+            parse_fn_enum = IRFunction::STRING_TO_DECIMAL4;
+            break;
+          case 8:
+            parse_fn_enum = IRFunction::STRING_TO_DECIMAL8;
+            break;
+          case 16:
+            parse_fn_enum = IRFunction::STRING_TO_DECIMAL16;
+            break;
+          default:
+            DCHECK(false);
+            return Status("TextConverter::CodegenWriteSlot(): "
+                "Decimal slots can't be this size.");
+        }
+        break;
       default:
         DCHECK(false);
         return Status("TextConverter::CodegenWriteSlot(): Codegen'd parser NYI 
for the"
@@ -244,17 +261,22 @@ Status TextConverter::CodegenWriteSlot(LlvmCodeGen* 
codegen,
     LlvmCodeGen::NamedVariable parse_result("parse_result", 
codegen->GetType(TYPE_INT));
     Value* parse_result_ptr = codegen->CreateEntryBlockAlloca(*fn, 
parse_result);
 
-    Value* parse_return;
+    CallInst* parse_return;
     // Call Impala's StringTo* function
-    if (parse_fn->arg_size() == 3) {
-      parse_return = builder.CreateCall(parse_fn, {args[1], args[2], 
parse_result_ptr});
-    } else {
-      DCHECK(parse_fn->arg_size() == 4);
+    // Function implementations in exec/hdfs-scanner-ir.cc
+    if (slot_desc->type().type == TYPE_DECIMAL) {
+      // Special case for decimal since it has additional precision/scale 
parameters
+      parse_return = builder.CreateCall(parse_fn, {args[1], args[2],
+          codegen->GetIntConstant(TYPE_INT, slot_desc->type().precision),
+          codegen->GetIntConstant(TYPE_INT, slot_desc->type().scale), 
parse_result_ptr});
+    } else if (slot_desc->type().type == TYPE_TIMESTAMP) {
       // If the return value is large (more than 16 bytes in our toolchain) 
the first
       // parameter would be a pointer to value parsed and the return value of 
callee
       // should be ignored
-      builder.CreateCall(parse_fn, {slot, args[1], args[2], parse_result_ptr});
-      parse_return = nullptr;
+      parse_return =
+          builder.CreateCall(parse_fn, {slot, args[1], args[2], 
parse_result_ptr});
+    } else {
+      parse_return = builder.CreateCall(parse_fn, {args[1], args[2], 
parse_result_ptr});
     }
     Value* parse_result_val = builder.CreateLoad(parse_result_ptr, 
"parse_result");
     Value* failed_value = codegen->GetIntConstant(TYPE_INT, 
StringParser::PARSE_FAILURE);
@@ -274,7 +296,15 @@ Status TextConverter::CodegenWriteSlot(LlvmCodeGen* 
codegen,
     // Parse succeeded
     builder.SetInsertPoint(parse_success_block);
     // If the parsed value is in parse_return, move it into slot
-    if (parse_fn->arg_size() == 3) builder.CreateStore(parse_return, slot);
+    if (slot_desc->type().type == TYPE_DECIMAL) {
+      // For Decimal values, the return type generated by Clang is struct type 
rather than
+      // integer so casting is necessary
+      Value* cast_slot = builder.CreateBitCast(slot,
+          parse_return->getType()->getPointerTo());
+      builder.CreateStore(parse_return, cast_slot);
+    } else if (slot_desc->type().type != TYPE_TIMESTAMP) {
+      builder.CreateStore(parse_return, slot);
+    }
     builder.CreateRet(codegen->true_value());
 
     // Parse failed, set slot to null and return false

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/679ebc1a/be/src/util/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/be/src/util/CMakeLists.txt b/be/src/util/CMakeLists.txt
index c38be86..3d92da7 100644
--- a/be/src/util/CMakeLists.txt
+++ b/be/src/util/CMakeLists.txt
@@ -70,6 +70,7 @@ add_library(Util
   redactor.cc
   runtime-profile.cc
   simple-logger.cc
+  string-parser.cc
   symbols-util.cc
   static-asserts.cc
   summary-util.cc

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/679ebc1a/be/src/util/string-parser.cc
----------------------------------------------------------------------
diff --git a/be/src/util/string-parser.cc b/be/src/util/string-parser.cc
new file mode 100644
index 0000000..f59c6b88
--- /dev/null
+++ b/be/src/util/string-parser.cc
@@ -0,0 +1,40 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "util/string-parser.h"
+
+namespace impala {
+
+using ParseResult = StringParser::ParseResult;
+Decimal4Value StringToDecimal4(const char* s, int len, int type_precision,
+    int type_scale, StringParser::ParseResult* result) {
+  return StringParser::StringToDecimal<int32_t>(s, len, type_precision,
+      type_scale, result);
+}
+
+Decimal8Value StringToDecimal8(const char* s, int len, int type_precision,
+    int type_scale, StringParser::ParseResult* result) {
+  return StringParser::StringToDecimal<int64_t>(s, len, type_precision,
+      type_scale, result);
+}
+
+Decimal16Value StringToDecimal16(const char* s, int len, int type_precision,
+    int type_scale, StringParser::ParseResult* result) {
+  return StringParser::StringToDecimal<int128_t>(s, len, type_precision,
+      type_scale, result);
+}
+}

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/679ebc1a/be/src/util/string-parser.h
----------------------------------------------------------------------
diff --git a/be/src/util/string-parser.h b/be/src/util/string-parser.h
index d6b94c7..10c3fc7 100644
--- a/be/src/util/string-parser.h
+++ b/be/src/util/string-parser.h
@@ -598,6 +598,15 @@ inline int 
StringParser::StringParseTraits<int32_t>::max_ascii_len() { return 10
 template<>
 inline int StringParser::StringParseTraits<int64_t>::max_ascii_len() { return 
19; }
 
-}
+// These functions are too large to benefit from inlining.
+Decimal4Value StringToDecimal4(const char* s, int len, int type_precision,
+    int type_scale, StringParser::ParseResult* result);
+
+Decimal8Value StringToDecimal8(const char* s, int len, int type_precision,
+    int type_scale, StringParser::ParseResult* result);
 
+Decimal16Value StringToDecimal16(const char* s, int len, int type_precision,
+    int type_scale, StringParser::ParseResult* result);
+
+}
 #endif

Reply via email to