IMPALA-5573: Add decimal codegen in text scanner This patch adds decimal type codegen support in text scanner. Currently codegen would be disabled if there is a decimal column. With this patch StringParser::StringToDecimal will be called in generated code. A new file util/string-parser.cc is created and linked into libUtil. This file contains proxy functions to StringToDecimal in ordered to keep StringToDecimal out of LLVM IR.
In a benchmark query: > select l_quantity, l_extendedprice, l_discount, l_tax from biglineitem where > l_quantity > 100.0; where biglineitem is tpch.lineitem repeated 6 times, the codegen version is 19% faster than non-codgen version in scanning, and 8% faster in query time. Codegen time in this simple case is 69ms. Simple performance tests show that putting the parser in libUtil instead of impala-sse.bc would reduce codegen time by 2/3 in cases where only one decimal column is parsed while the scanning time is nearly the same. Change-Id: Ia65820e969d59094dc92d912a5279fa90f6b179d Reviewed-on: http://gerrit.cloudera.org:8080/7683 Reviewed-by: Tim Armstrong <[email protected]> Tested-by: Impala Public Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/679ebc1a Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/679ebc1a Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/679ebc1a Branch: refs/heads/master Commit: 679ebc1ac66e07edc0e81b34c8ed56a0a8aff0ca Parents: 3b1a1df Author: Tianyi Wang <[email protected]> Authored: Thu Aug 17 17:35:31 2017 -0700 Committer: Impala Public Jenkins <[email protected]> Committed: Wed Aug 23 06:24:12 2017 +0000 ---------------------------------------------------------------------- be/src/codegen/gen_ir_descriptions.py | 5 ++- be/src/exec/hdfs-scanner-ir.cc | 53 ++++++++++++++++++++++++------ be/src/exec/hdfs-scanner.cc | 8 ----- be/src/exec/text-converter.cc | 46 +++++++++++++++++++++----- be/src/util/CMakeLists.txt | 1 + be/src/util/string-parser.cc | 40 ++++++++++++++++++++++ be/src/util/string-parser.h | 11 ++++++- 7 files changed, 136 insertions(+), 28 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/679ebc1a/be/src/codegen/gen_ir_descriptions.py ---------------------------------------------------------------------- diff --git a/be/src/codegen/gen_ir_descriptions.py b/be/src/codegen/gen_ir_descriptions.py index 3cc195c..668ae24 100755 --- a/be/src/codegen/gen_ir_descriptions.py +++ b/be/src/codegen/gen_ir_descriptions.py @@ -179,13 +179,16 @@ ir_functions = [ ["PARQUET_SCANNER_EVAL_RUNTIME_FILTER", "_ZN6impala18HdfsParquetScanner17EvalRuntimeFilterEiPNS_8TupleRowE"], ["STRING_TO_BOOL", "IrStringToBool"], - ["STRING_TO_INT8", "_Z14IrStringToInt8PKciPN6impala12StringParser11ParseResultE"], + ["STRING_TO_INT8", "IrStringToInt8"], ["STRING_TO_INT16", "IrStringToInt16"], ["STRING_TO_INT32", "IrStringToInt32"], ["STRING_TO_INT64", "IrStringToInt64"], ["STRING_TO_FLOAT", "IrStringToFloat"], ["STRING_TO_DOUBLE", "IrStringToDouble"], ["STRING_TO_TIMESTAMP", "IrStringToTimestamp"], + ["STRING_TO_DECIMAL4", "IrStringToDecimal4"], + ["STRING_TO_DECIMAL8", "IrStringToDecimal8"], + ["STRING_TO_DECIMAL16", "IrStringToDecimal16"], ["IS_NULL_STRING", "IrIsNullString"], ["GENERIC_IS_NULL_STRING", "IrGenericIsNullString"], ["RAW_VALUE_COMPARE", http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/679ebc1a/be/src/exec/hdfs-scanner-ir.cc ---------------------------------------------------------------------- diff --git a/be/src/exec/hdfs-scanner-ir.cc b/be/src/exec/hdfs-scanner-ir.cc index 143adb7..867ce1e 100644 --- a/be/src/exec/hdfs-scanner-ir.cc +++ b/be/src/exec/hdfs-scanner-ir.cc @@ -80,46 +80,79 @@ ScalarExprEvaluator* HdfsScanner::GetConjunctEval(int idx) const { return (*conjunct_evals_)[idx]; } +void StringToDecimalSymbolDummy() { + // Force linker to to link the object file containing these functions. + StringToDecimal4(nullptr, 0, 0, 0, nullptr); + StringToDecimal8(nullptr, 0, 0, 0, nullptr); + StringToDecimal16(nullptr, 0, 0, 0, nullptr); +} + // Define the string parsing functions for llvm. Stamp out the templated functions #ifdef IR_COMPILE +using ParseResult = StringParser::ParseResult; extern "C" -bool IrStringToBool(const char* s, int len, StringParser::ParseResult* result) { +bool IrStringToBool(const char* s, int len, ParseResult* result) { return StringParser::StringToBool(s, len, result); } -int8_t IrStringToInt8(const char* s, int len, StringParser::ParseResult* result) { +extern "C" +int8_t IrStringToInt8(const char* s, int len, ParseResult* result) { return StringParser::StringToInt<int8_t>(s, len, result); } extern "C" -int16_t IrStringToInt16(const char* s, int len, StringParser::ParseResult* result) { +int16_t IrStringToInt16(const char* s, int len, ParseResult* result) { return StringParser::StringToInt<int16_t>(s, len, result); } extern "C" -int32_t IrStringToInt32(const char* s, int len, StringParser::ParseResult* result) { +int32_t IrStringToInt32(const char* s, int len, ParseResult* result) { return StringParser::StringToInt<int32_t>(s, len, result); } extern "C" -int64_t IrStringToInt64(const char* s, int len, StringParser::ParseResult* result) { +int64_t IrStringToInt64(const char* s, int len, ParseResult* result) { return StringParser::StringToInt<int64_t>(s, len, result); } extern "C" -float IrStringToFloat(const char* s, int len, StringParser::ParseResult* result) { +float IrStringToFloat(const char* s, int len, ParseResult* result) { return StringParser::StringToFloat<float>(s, len, result); } extern "C" -double IrStringToDouble(const char* s, int len, StringParser::ParseResult* result) { +double IrStringToDouble(const char* s, int len, ParseResult* result) { return StringParser::StringToFloat<double>(s, len, result); } extern "C" -TimestampValue IrStringToTimestamp(const char* s, int len, - StringParser::ParseResult* result) { - return StringParser::StringToTimestamp(s, len, result); +void IrStringToTimestamp(TimestampValue* out, const char* s, int len, + ParseResult* result) { + *out = StringParser::StringToTimestamp(s, len, result); +} + +extern "C" +Decimal4Value IrStringToDecimal4(const char* s, int len, int type_precision, + int type_scale, ParseResult* result) { + auto ret = StringToDecimal4(s, len, type_precision, type_scale, result); + if (*result != ParseResult::PARSE_SUCCESS) *result = ParseResult::PARSE_FAILURE; + return ret; +} + +extern "C" +Decimal8Value IrStringToDecimal8(const char* s, int len, int type_precision, + int type_scale, ParseResult* result) { + auto ret = StringToDecimal8(s, len, type_precision, type_scale, result); + if (*result != ParseResult::PARSE_SUCCESS) *result = ParseResult::PARSE_FAILURE; + return ret; +} + +extern "C" +Decimal16Value IrStringToDecimal16(const char* s, int len, int type_precision, + int type_scale, ParseResult* result) { + auto ret = StringToDecimal16(s, len, type_precision, type_scale, result); + if (*result != ParseResult::PARSE_SUCCESS) *result = ParseResult::PARSE_FAILURE; + return ret; } extern "C" http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/679ebc1a/be/src/exec/hdfs-scanner.cc ---------------------------------------------------------------------- diff --git a/be/src/exec/hdfs-scanner.cc b/be/src/exec/hdfs-scanner.cc index 0ef62ab..e81a72a 100644 --- a/be/src/exec/hdfs-scanner.cc +++ b/be/src/exec/hdfs-scanner.cc @@ -308,14 +308,6 @@ Status HdfsScanner::CodegenWriteCompleteTuple(HdfsScanNodeBase* node, SCOPED_TIMER(codegen->codegen_timer()); RuntimeState* state = node->runtime_state(); - // TODO: Timestamp is not yet supported - for (int i = 0; i < node->materialized_slots().size(); ++i) { - SlotDescriptor* slot_desc = node->materialized_slots()[i]; - if (slot_desc->type().type == TYPE_DECIMAL) { - return Status::Expected("Decimal not yet supported for codegen."); - } - } - // Cast away const-ness. The codegen only sets the cached typed llvm struct. TupleDescriptor* tuple_desc = const_cast<TupleDescriptor*>(node->tuple_desc()); vector<Function*> slot_fns; http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/679ebc1a/be/src/exec/text-converter.cc ---------------------------------------------------------------------- diff --git a/be/src/exec/text-converter.cc b/be/src/exec/text-converter.cc index 431a11b..0cad05c 100644 --- a/be/src/exec/text-converter.cc +++ b/be/src/exec/text-converter.cc @@ -229,6 +229,23 @@ Status TextConverter::CodegenWriteSlot(LlvmCodeGen* codegen, case TYPE_TIMESTAMP: parse_fn_enum = IRFunction::STRING_TO_TIMESTAMP; break; + case TYPE_DECIMAL: + switch (slot_desc->slot_size()) { + case 4: + parse_fn_enum = IRFunction::STRING_TO_DECIMAL4; + break; + case 8: + parse_fn_enum = IRFunction::STRING_TO_DECIMAL8; + break; + case 16: + parse_fn_enum = IRFunction::STRING_TO_DECIMAL16; + break; + default: + DCHECK(false); + return Status("TextConverter::CodegenWriteSlot(): " + "Decimal slots can't be this size."); + } + break; default: DCHECK(false); return Status("TextConverter::CodegenWriteSlot(): Codegen'd parser NYI for the" @@ -244,17 +261,22 @@ Status TextConverter::CodegenWriteSlot(LlvmCodeGen* codegen, LlvmCodeGen::NamedVariable parse_result("parse_result", codegen->GetType(TYPE_INT)); Value* parse_result_ptr = codegen->CreateEntryBlockAlloca(*fn, parse_result); - Value* parse_return; + CallInst* parse_return; // Call Impala's StringTo* function - if (parse_fn->arg_size() == 3) { - parse_return = builder.CreateCall(parse_fn, {args[1], args[2], parse_result_ptr}); - } else { - DCHECK(parse_fn->arg_size() == 4); + // Function implementations in exec/hdfs-scanner-ir.cc + if (slot_desc->type().type == TYPE_DECIMAL) { + // Special case for decimal since it has additional precision/scale parameters + parse_return = builder.CreateCall(parse_fn, {args[1], args[2], + codegen->GetIntConstant(TYPE_INT, slot_desc->type().precision), + codegen->GetIntConstant(TYPE_INT, slot_desc->type().scale), parse_result_ptr}); + } else if (slot_desc->type().type == TYPE_TIMESTAMP) { // If the return value is large (more than 16 bytes in our toolchain) the first // parameter would be a pointer to value parsed and the return value of callee // should be ignored - builder.CreateCall(parse_fn, {slot, args[1], args[2], parse_result_ptr}); - parse_return = nullptr; + parse_return = + builder.CreateCall(parse_fn, {slot, args[1], args[2], parse_result_ptr}); + } else { + parse_return = builder.CreateCall(parse_fn, {args[1], args[2], parse_result_ptr}); } Value* parse_result_val = builder.CreateLoad(parse_result_ptr, "parse_result"); Value* failed_value = codegen->GetIntConstant(TYPE_INT, StringParser::PARSE_FAILURE); @@ -274,7 +296,15 @@ Status TextConverter::CodegenWriteSlot(LlvmCodeGen* codegen, // Parse succeeded builder.SetInsertPoint(parse_success_block); // If the parsed value is in parse_return, move it into slot - if (parse_fn->arg_size() == 3) builder.CreateStore(parse_return, slot); + if (slot_desc->type().type == TYPE_DECIMAL) { + // For Decimal values, the return type generated by Clang is struct type rather than + // integer so casting is necessary + Value* cast_slot = builder.CreateBitCast(slot, + parse_return->getType()->getPointerTo()); + builder.CreateStore(parse_return, cast_slot); + } else if (slot_desc->type().type != TYPE_TIMESTAMP) { + builder.CreateStore(parse_return, slot); + } builder.CreateRet(codegen->true_value()); // Parse failed, set slot to null and return false http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/679ebc1a/be/src/util/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/be/src/util/CMakeLists.txt b/be/src/util/CMakeLists.txt index c38be86..3d92da7 100644 --- a/be/src/util/CMakeLists.txt +++ b/be/src/util/CMakeLists.txt @@ -70,6 +70,7 @@ add_library(Util redactor.cc runtime-profile.cc simple-logger.cc + string-parser.cc symbols-util.cc static-asserts.cc summary-util.cc http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/679ebc1a/be/src/util/string-parser.cc ---------------------------------------------------------------------- diff --git a/be/src/util/string-parser.cc b/be/src/util/string-parser.cc new file mode 100644 index 0000000..f59c6b88 --- /dev/null +++ b/be/src/util/string-parser.cc @@ -0,0 +1,40 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "util/string-parser.h" + +namespace impala { + +using ParseResult = StringParser::ParseResult; +Decimal4Value StringToDecimal4(const char* s, int len, int type_precision, + int type_scale, StringParser::ParseResult* result) { + return StringParser::StringToDecimal<int32_t>(s, len, type_precision, + type_scale, result); +} + +Decimal8Value StringToDecimal8(const char* s, int len, int type_precision, + int type_scale, StringParser::ParseResult* result) { + return StringParser::StringToDecimal<int64_t>(s, len, type_precision, + type_scale, result); +} + +Decimal16Value StringToDecimal16(const char* s, int len, int type_precision, + int type_scale, StringParser::ParseResult* result) { + return StringParser::StringToDecimal<int128_t>(s, len, type_precision, + type_scale, result); +} +} http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/679ebc1a/be/src/util/string-parser.h ---------------------------------------------------------------------- diff --git a/be/src/util/string-parser.h b/be/src/util/string-parser.h index d6b94c7..10c3fc7 100644 --- a/be/src/util/string-parser.h +++ b/be/src/util/string-parser.h @@ -598,6 +598,15 @@ inline int StringParser::StringParseTraits<int32_t>::max_ascii_len() { return 10 template<> inline int StringParser::StringParseTraits<int64_t>::max_ascii_len() { return 19; } -} +// These functions are too large to benefit from inlining. +Decimal4Value StringToDecimal4(const char* s, int len, int type_precision, + int type_scale, StringParser::ParseResult* result); + +Decimal8Value StringToDecimal8(const char* s, int len, int type_precision, + int type_scale, StringParser::ParseResult* result); +Decimal16Value StringToDecimal16(const char* s, int len, int type_precision, + int type_scale, StringParser::ParseResult* result); + +} #endif
