IMPALA-5572: Timestamp codegen for text scanner Currently codegen is disabled when scanning text tables with timestamp columns. The message is "Timestamp not yet supported for codegen." This patch adds support for timestamp codegen. A simple query in the comment section of this issue performs a little better (4%) than interpreted version.
Testing: The patch passed test with exhaustive workload exploration strategy. Change-Id: I00cbf8ec7784ca9594e14e952f46dc54a5ede44b Reviewed-on: http://gerrit.cloudera.org:8080/7556 Reviewed-by: Tim Armstrong <[email protected]> Tested-by: Impala Public Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/d61065d6 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/d61065d6 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/d61065d6 Branch: refs/heads/master Commit: d61065d6383ece19fdafd2526ecf00dab4e1f4d4 Parents: 0c46147 Author: Tianyi Wang <[email protected]> Authored: Tue Aug 1 14:52:55 2017 -0700 Committer: Impala Public Jenkins <[email protected]> Committed: Tue Aug 8 10:23:29 2017 +0000 ---------------------------------------------------------------------- be/src/codegen/gen_ir_descriptions.py | 1 + be/src/exec/hdfs-scanner-ir.cc | 6 ++++++ be/src/exec/hdfs-scanner.cc | 3 --- be/src/exec/text-converter.cc | 19 ++++++++++++++++--- be/src/util/string-parser.h | 11 +++++++++++ 5 files changed, 34 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d61065d6/be/src/codegen/gen_ir_descriptions.py ---------------------------------------------------------------------- diff --git a/be/src/codegen/gen_ir_descriptions.py b/be/src/codegen/gen_ir_descriptions.py index be4be80..3cc195c 100755 --- a/be/src/codegen/gen_ir_descriptions.py +++ b/be/src/codegen/gen_ir_descriptions.py @@ -185,6 +185,7 @@ ir_functions = [ ["STRING_TO_INT64", "IrStringToInt64"], ["STRING_TO_FLOAT", "IrStringToFloat"], ["STRING_TO_DOUBLE", "IrStringToDouble"], + ["STRING_TO_TIMESTAMP", "IrStringToTimestamp"], ["IS_NULL_STRING", "IrIsNullString"], ["GENERIC_IS_NULL_STRING", "IrGenericIsNullString"], ["RAW_VALUE_COMPARE", http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d61065d6/be/src/exec/hdfs-scanner-ir.cc ---------------------------------------------------------------------- diff --git a/be/src/exec/hdfs-scanner-ir.cc b/be/src/exec/hdfs-scanner-ir.cc index 96aad07..143adb7 100644 --- a/be/src/exec/hdfs-scanner-ir.cc +++ b/be/src/exec/hdfs-scanner-ir.cc @@ -117,6 +117,12 @@ double IrStringToDouble(const char* s, int len, StringParser::ParseResult* resul } extern "C" +TimestampValue IrStringToTimestamp(const char* s, int len, + StringParser::ParseResult* result) { + return StringParser::StringToTimestamp(s, len, result); +} + +extern "C" bool IrIsNullString(const char* data, int len) { return data == NULL || (len == 2 && data[0] == '\\' && data[1] == 'N'); } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d61065d6/be/src/exec/hdfs-scanner.cc ---------------------------------------------------------------------- diff --git a/be/src/exec/hdfs-scanner.cc b/be/src/exec/hdfs-scanner.cc index 10e4edd..23fcc20 100644 --- a/be/src/exec/hdfs-scanner.cc +++ b/be/src/exec/hdfs-scanner.cc @@ -311,9 +311,6 @@ Status HdfsScanner::CodegenWriteCompleteTuple(HdfsScanNodeBase* node, // TODO: Timestamp is not yet supported for (int i = 0; i < node->materialized_slots().size(); ++i) { SlotDescriptor* slot_desc = node->materialized_slots()[i]; - if (slot_desc->type().type == TYPE_TIMESTAMP) { - return Status::Expected("Timestamp not yet supported for codegen."); - } if (slot_desc->type().type == TYPE_DECIMAL) { return Status::Expected("Decimal not yet supported for codegen."); } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d61065d6/be/src/exec/text-converter.cc ---------------------------------------------------------------------- diff --git a/be/src/exec/text-converter.cc b/be/src/exec/text-converter.cc index e12be85..db390c4 100644 --- a/be/src/exec/text-converter.cc +++ b/be/src/exec/text-converter.cc @@ -220,6 +220,9 @@ Function* TextConverter::CodegenWriteSlot(LlvmCodeGen* codegen, case TYPE_DOUBLE: parse_fn_enum = IRFunction::STRING_TO_DOUBLE; break; + case TYPE_TIMESTAMP: + parse_fn_enum = IRFunction::STRING_TO_TIMESTAMP; + break; default: DCHECK(false); return NULL; @@ -234,9 +237,18 @@ Function* TextConverter::CodegenWriteSlot(LlvmCodeGen* codegen, LlvmCodeGen::NamedVariable parse_result("parse_result", codegen->GetType(TYPE_INT)); Value* parse_result_ptr = codegen->CreateEntryBlockAlloca(fn, parse_result); + Value* parse_return; // Call Impala's StringTo* function - Value* result = builder.CreateCall(parse_fn, - ArrayRef<Value*>({args[1], args[2], parse_result_ptr})); + if (parse_fn->arg_size() == 3) { + parse_return = builder.CreateCall(parse_fn, {args[1], args[2], parse_result_ptr}); + } else { + DCHECK(parse_fn->arg_size() == 4); + // If the return value is large (more than 16 bytes in our toolchain) the first + // parameter would be a pointer to value parsed and the return value of callee + // should be ignored + builder.CreateCall(parse_fn, {slot, args[1], args[2], parse_result_ptr}); + parse_return = nullptr; + } Value* parse_result_val = builder.CreateLoad(parse_result_ptr, "parse_result"); Value* failed_value = codegen->GetIntConstant(TYPE_INT, StringParser::PARSE_FAILURE); @@ -254,7 +266,8 @@ Function* TextConverter::CodegenWriteSlot(LlvmCodeGen* codegen, // Parse succeeded builder.SetInsertPoint(parse_success_block); - builder.CreateStore(result, slot); + // If the parsed value is in parse_return, move it into slot + if (parse_fn->arg_size() == 3) builder.CreateStore(parse_return, slot); builder.CreateRet(codegen->true_value()); // Parse failed, set slot to null and return false http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d61065d6/be/src/util/string-parser.h ---------------------------------------------------------------------- diff --git a/be/src/util/string-parser.h b/be/src/util/string-parser.h index cc9c517..d6b94c7 100644 --- a/be/src/util/string-parser.h +++ b/be/src/util/string-parser.h @@ -25,6 +25,8 @@ #include "common/logging.h" #include "runtime/decimal-value.h" +#include "runtime/timestamp-parse-util.h" +#include "runtime/timestamp-value.h" #include "util/decimal-util.h" namespace impala { @@ -95,6 +97,15 @@ class StringParser { return StringToBoolInternal(s + i, len - i, result); } + /// Parse a TimestampValue from s. + static inline TimestampValue StringToTimestamp(const char* s, int len, + ParseResult* result) { + boost::gregorian::date d; + boost::posix_time::time_duration t; + *result = TimestampParser::Parse(s, len, &d, &t) ? PARSE_SUCCESS : PARSE_FAILURE; + return {d, t}; + } + /// Parses a decimal from s, returning the result. /// The parse status is returned in *result. /// On overflow or invalid values, the return value is undefined.
