IMPALA-3206: Enable codegen for AVRO_DECIMAL This change adds the missing switch statement in CodegenReadScalar() for AVRO_DECIMAL so that we will also codegen if an avro table contains AVRO_DECIMAL. With this change, the following query improves by 37.5%, going from 8s to 5s:
select count(distinct l_linenumber), avg(l_extendedprice), max(l_discount), min(l_tax) from tpch15_avro.lineitem; This change also un-inlines BitUtil::ByteSwap() as the third argument 'len' is not compilation constant for all call sites. Change-Id: I51adf0c1ba76e055f31ccb0034a0d23ea2afb30e Reviewed-on: http://gerrit.cloudera.org:8080/3489 Reviewed-by: Michael Ho <[email protected]> Tested-by: Internal Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/df2fc08d Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/df2fc08d Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/df2fc08d Branch: refs/heads/master Commit: df2fc08d22a3be8c9b6a2f2e6a4d9d59c2b6ca44 Parents: 948a6c3 Author: Michael Ho <[email protected]> Authored: Wed Jun 8 15:18:30 2016 -0700 Committer: Taras Bobrovytsky <[email protected]> Committed: Thu Jul 14 19:04:44 2016 +0000 ---------------------------------------------------------------------- be/src/codegen/gen_ir_descriptions.py | 1 + be/src/exec/hdfs-avro-scanner.cc | 4 +- be/src/util/CMakeLists.txt | 1 + be/src/util/bit-util-test.cc | 2 +- be/src/util/bit-util.cc | 124 ++++++++++++++++++ be/src/util/bit-util.h | 2 +- be/src/util/bit-util.inline.h | 130 ------------------- be/src/util/decimal-util.h | 2 +- .../queries/QueryTest/decimal_avro.test | 15 +++ 9 files changed, 147 insertions(+), 134 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/df2fc08d/be/src/codegen/gen_ir_descriptions.py ---------------------------------------------------------------------- diff --git a/be/src/codegen/gen_ir_descriptions.py b/be/src/codegen/gen_ir_descriptions.py index 5cc992c..280ab96 100755 --- a/be/src/codegen/gen_ir_descriptions.py +++ b/be/src/codegen/gen_ir_descriptions.py @@ -106,6 +106,7 @@ ir_functions = [ ["READ_AVRO_STRING", "ReadAvroString"], ["READ_AVRO_VARCHAR", "ReadAvroVarchar"], ["READ_AVRO_CHAR", "ReadAvroChar"], + ["READ_AVRO_DECIMAL", "ReadAvroDecimal"], ["HDFS_SCANNER_WRITE_ALIGNED_TUPLES", "WriteAlignedTuples"], ["HDFS_SCANNER_GET_CONJUNCT_CTX", "GetConjunctCtx"], ["STRING_TO_BOOL", "IrStringToBool"], http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/df2fc08d/be/src/exec/hdfs-avro-scanner.cc ---------------------------------------------------------------------- diff --git a/be/src/exec/hdfs-avro-scanner.cc b/be/src/exec/hdfs-avro-scanner.cc index fdb0a96..09d3955 100644 --- a/be/src/exec/hdfs-avro-scanner.cc +++ b/be/src/exec/hdfs-avro-scanner.cc @@ -957,7 +957,9 @@ Status HdfsAvroScanner::CodegenReadScalar(const AvroSchemaElement& element, read_field_fn = codegen->GetFunction(IRFunction::READ_AVRO_STRING, false); } break; - // TODO: Add AVRO_DECIMAL here. + case AVRO_DECIMAL: + read_field_fn = codegen->GetFunction(IRFunction::READ_AVRO_DECIMAL, false); + break; default: return Status(Substitute( "Failed to codegen MaterializeTuple() due to unsupported type: $0", http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/df2fc08d/be/src/util/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/be/src/util/CMakeLists.txt b/be/src/util/CMakeLists.txt index 57bf572..2bf5e49 100644 --- a/be/src/util/CMakeLists.txt +++ b/be/src/util/CMakeLists.txt @@ -31,6 +31,7 @@ add_library(Util avro-util.cc benchmark.cc bitmap.cc + bit-util.cc bloom-filter.cc cgroups-mgr.cc codec.cc http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/df2fc08d/be/src/util/bit-util-test.cc ---------------------------------------------------------------------- diff --git a/be/src/util/bit-util-test.cc b/be/src/util/bit-util-test.cc index e2bdd97..9947fca 100644 --- a/be/src/util/bit-util-test.cc +++ b/be/src/util/bit-util-test.cc @@ -21,7 +21,7 @@ #include <gtest/gtest.h> #include "gutil/bits.h" -#include "util/bit-util.inline.h" +#include "util/bit-util.h" #include "util/cpu-info.h" #include "common/names.h" http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/df2fc08d/be/src/util/bit-util.cc ---------------------------------------------------------------------- diff --git a/be/src/util/bit-util.cc b/be/src/util/bit-util.cc new file mode 100644 index 0000000..6853764 --- /dev/null +++ b/be/src/util/bit-util.cc @@ -0,0 +1,124 @@ +// Copyright 2016 Cloudera Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "util/bit-util.h" + +namespace impala { + +void BitUtil::ByteSwap(void* dest, const void* source, int len) { + uint8_t* dst = reinterpret_cast<uint8_t*>(dest); + const uint8_t* src = reinterpret_cast<const uint8_t*>(source); + switch (len) { + case 1: + *reinterpret_cast<uint8_t*>(dst) = *reinterpret_cast<const uint8_t*>(src); + return; + case 2: + *reinterpret_cast<uint16_t*>(dst) = + ByteSwap(*reinterpret_cast<const uint16_t*>(src)); + return; + case 3: + *reinterpret_cast<uint16_t*>(dst + 1) = + ByteSwap(*reinterpret_cast<const uint16_t*>(src)); + *reinterpret_cast<uint8_t*>(dst) = *reinterpret_cast<const uint8_t*>(src + 2); + return; + case 4: + *reinterpret_cast<uint32_t*>(dst) = + ByteSwap(*reinterpret_cast<const uint32_t*>(src)); + return; + case 5: + *reinterpret_cast<uint32_t*>(dst + 1) = + ByteSwap(*reinterpret_cast<const uint32_t*>(src)); + *reinterpret_cast<uint8_t*>(dst) = *reinterpret_cast<const uint8_t*>(src + 4); + return; + case 6: + *reinterpret_cast<uint32_t*>(dst + 2) = + ByteSwap(*reinterpret_cast<const uint32_t*>(src)); + *reinterpret_cast<uint16_t*>(dst) = + ByteSwap(*reinterpret_cast<const uint16_t*>(src + 4)); + return; + case 7: + *reinterpret_cast<uint32_t*>(dst + 3) = + ByteSwap(*reinterpret_cast<const uint32_t*>(src)); + *reinterpret_cast<uint16_t*>(dst + 1) = + ByteSwap(*reinterpret_cast<const uint16_t*>(src + 4)); + *reinterpret_cast<uint8_t*>(dst) = *reinterpret_cast<const uint8_t*>(src + 6); + return; + case 8: + *reinterpret_cast<uint64_t*>(dst) = + ByteSwap(*reinterpret_cast<const uint64_t*>(src)); + return; + case 9: + *reinterpret_cast<uint64_t*>(dst + 1) = + ByteSwap(*reinterpret_cast<const uint64_t*>(src)); + *reinterpret_cast<uint8_t*>(dst) = *reinterpret_cast<const uint8_t*>(src + 8); + return; + case 10: + *reinterpret_cast<uint64_t*>(dst + 2) = + ByteSwap(*reinterpret_cast<const uint64_t*>(src)); + *reinterpret_cast<uint16_t*>(dst) = + ByteSwap(*reinterpret_cast<const uint16_t*>(src + 8)); + return; + case 11: + *reinterpret_cast<uint64_t*>(dst + 3) = + ByteSwap(*reinterpret_cast<const uint64_t*>(src)); + *reinterpret_cast<uint16_t*>(dst + 1) = + ByteSwap(*reinterpret_cast<const uint16_t*>(src + 8)); + *reinterpret_cast<uint8_t*>(dst) = *reinterpret_cast<const uint8_t*>(src + 10); + return; + case 12: + *reinterpret_cast<uint64_t*>(dst + 4) = + ByteSwap(*reinterpret_cast<const uint64_t*>(src)); + *reinterpret_cast<uint32_t*>(dst) = + ByteSwap(*reinterpret_cast<const uint32_t*>(src + 8)); + return; + case 13: + *reinterpret_cast<uint64_t*>(dst + 5) = + ByteSwap(*reinterpret_cast<const uint64_t*>(src)); + *reinterpret_cast<uint32_t*>(dst + 1) = + ByteSwap(*reinterpret_cast<const uint32_t*>(src + 8)); + *reinterpret_cast<uint8_t*>(dst) = *reinterpret_cast<const uint8_t*>(src + 12); + return; + case 14: + *reinterpret_cast<uint64_t*>(dst + 6) = + ByteSwap(*reinterpret_cast<const uint64_t*>(src)); + *reinterpret_cast<uint32_t*>(dst + 2) = + ByteSwap(*reinterpret_cast<const uint32_t*>(src + 8)); + *reinterpret_cast<uint16_t*>(dst) = + ByteSwap(*reinterpret_cast<const uint16_t*>(src + 12)); + return; + case 15: + *reinterpret_cast<uint64_t*>(dst + 7) = + ByteSwap(*reinterpret_cast<const uint64_t*>(src)); + *reinterpret_cast<uint32_t*>(dst + 3) = + ByteSwap(*reinterpret_cast<const uint32_t*>(src + 8)); + *reinterpret_cast<uint16_t*>(dst + 1) = + ByteSwap(*reinterpret_cast<const uint16_t*>(src + 12)); + *reinterpret_cast<uint8_t*>(dst) = *reinterpret_cast<const uint8_t*>(src + 14); + return; + case 16: + *reinterpret_cast<uint64_t*>(dst + 8) = + ByteSwap(*reinterpret_cast<const uint64_t*>(src)); + *reinterpret_cast<uint64_t*>(dst) = + ByteSwap(*reinterpret_cast<const uint64_t*>(src + 8)); + return; + default: + // Revert to slow loop-based swap. + for (int i = 0; i < len; ++i) { + dst[i] = src[len - i - 1]; + } + return; + } +} + +} http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/df2fc08d/be/src/util/bit-util.h ---------------------------------------------------------------------- diff --git a/be/src/util/bit-util.h b/be/src/util/bit-util.h index 14553ae..a38fcba 100644 --- a/be/src/util/bit-util.h +++ b/be/src/util/bit-util.h @@ -167,7 +167,7 @@ class BitUtil { /// Write the swapped bytes into dest; source and dest must not overlap. /// This function is optimized for len <= 16. It reverts to a slow loop-based /// swap for len > 16. - static inline void ByteSwap(void* dest, const void* source, int len); + static void ByteSwap(void* dest, const void* source, int len); /// Converts to big endian format (if not already in big endian) from the /// machine's native endian format. http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/df2fc08d/be/src/util/bit-util.inline.h ---------------------------------------------------------------------- diff --git a/be/src/util/bit-util.inline.h b/be/src/util/bit-util.inline.h deleted file mode 100644 index 33b44c6..0000000 --- a/be/src/util/bit-util.inline.h +++ /dev/null @@ -1,130 +0,0 @@ -// Copyright 2016 Cloudera Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - - -#ifndef IMPALA_BIT_UTIL_INLINE_H -#define IMPALA_BIT_UTIL_INLINE_H - -#include "util/bit-util.h" - -namespace impala { - -inline void BitUtil::ByteSwap(void* dest, const void* source, int len) { - uint8_t* dst = reinterpret_cast<uint8_t*>(dest); - const uint8_t* src = reinterpret_cast<const uint8_t*>(source); - switch (len) { - case 1: - *reinterpret_cast<uint8_t*>(dst) = *reinterpret_cast<const uint8_t*>(src); - return; - case 2: - *reinterpret_cast<uint16_t*>(dst) = - ByteSwap(*reinterpret_cast<const uint16_t*>(src)); - return; - case 3: - *reinterpret_cast<uint16_t*>(dst + 1) = - ByteSwap(*reinterpret_cast<const uint16_t*>(src)); - *reinterpret_cast<uint8_t*>(dst) = *reinterpret_cast<const uint8_t*>(src + 2); - return; - case 4: - *reinterpret_cast<uint32_t*>(dst) = - ByteSwap(*reinterpret_cast<const uint32_t*>(src)); - return; - case 5: - *reinterpret_cast<uint32_t*>(dst + 1) = - ByteSwap(*reinterpret_cast<const uint32_t*>(src)); - *reinterpret_cast<uint8_t*>(dst) = *reinterpret_cast<const uint8_t*>(src + 4); - return; - case 6: - *reinterpret_cast<uint32_t*>(dst + 2) = - ByteSwap(*reinterpret_cast<const uint32_t*>(src)); - *reinterpret_cast<uint16_t*>(dst) = - ByteSwap(*reinterpret_cast<const uint16_t*>(src + 4)); - return; - case 7: - *reinterpret_cast<uint32_t*>(dst + 3) = - ByteSwap(*reinterpret_cast<const uint32_t*>(src)); - *reinterpret_cast<uint16_t*>(dst + 1) = - ByteSwap(*reinterpret_cast<const uint16_t*>(src + 4)); - *reinterpret_cast<uint8_t*>(dst) = *reinterpret_cast<const uint8_t*>(src + 6); - return; - case 8: - *reinterpret_cast<uint64_t*>(dst) = - ByteSwap(*reinterpret_cast<const uint64_t*>(src)); - return; - case 9: - *reinterpret_cast<uint64_t*>(dst + 1) = - ByteSwap(*reinterpret_cast<const uint64_t*>(src)); - *reinterpret_cast<uint8_t*>(dst) = *reinterpret_cast<const uint8_t*>(src + 8); - return; - case 10: - *reinterpret_cast<uint64_t*>(dst + 2) = - ByteSwap(*reinterpret_cast<const uint64_t*>(src)); - *reinterpret_cast<uint16_t*>(dst) = - ByteSwap(*reinterpret_cast<const uint16_t*>(src + 8)); - return; - case 11: - *reinterpret_cast<uint64_t*>(dst + 3) = - ByteSwap(*reinterpret_cast<const uint64_t*>(src)); - *reinterpret_cast<uint16_t*>(dst + 1) = - ByteSwap(*reinterpret_cast<const uint16_t*>(src + 8)); - *reinterpret_cast<uint8_t*>(dst) = *reinterpret_cast<const uint8_t*>(src + 10); - return; - case 12: - *reinterpret_cast<uint64_t*>(dst + 4) = - ByteSwap(*reinterpret_cast<const uint64_t*>(src)); - *reinterpret_cast<uint32_t*>(dst) = - ByteSwap(*reinterpret_cast<const uint32_t*>(src + 8)); - return; - case 13: - *reinterpret_cast<uint64_t*>(dst + 5) = - ByteSwap(*reinterpret_cast<const uint64_t*>(src)); - *reinterpret_cast<uint32_t*>(dst + 1) = - ByteSwap(*reinterpret_cast<const uint32_t*>(src + 8)); - *reinterpret_cast<uint8_t*>(dst) = *reinterpret_cast<const uint8_t*>(src + 12); - return; - case 14: - *reinterpret_cast<uint64_t*>(dst + 6) = - ByteSwap(*reinterpret_cast<const uint64_t*>(src)); - *reinterpret_cast<uint32_t*>(dst + 2) = - ByteSwap(*reinterpret_cast<const uint32_t*>(src + 8)); - *reinterpret_cast<uint16_t*>(dst) = - ByteSwap(*reinterpret_cast<const uint16_t*>(src + 12)); - return; - case 15: - *reinterpret_cast<uint64_t*>(dst + 7) = - ByteSwap(*reinterpret_cast<const uint64_t*>(src)); - *reinterpret_cast<uint32_t*>(dst + 3) = - ByteSwap(*reinterpret_cast<const uint32_t*>(src + 8)); - *reinterpret_cast<uint16_t*>(dst + 1) = - ByteSwap(*reinterpret_cast<const uint16_t*>(src + 12)); - *reinterpret_cast<uint8_t*>(dst) = *reinterpret_cast<const uint8_t*>(src + 14); - return; - case 16: - *reinterpret_cast<uint64_t*>(dst + 8) = - ByteSwap(*reinterpret_cast<const uint64_t*>(src)); - *reinterpret_cast<uint64_t*>(dst) = - ByteSwap(*reinterpret_cast<const uint64_t*>(src + 8)); - return; - default: - // Revert to slow loop-based swap. - for (int i = 0; i < len; ++i) { - dst[i] = src[len - i - 1]; - } - return; - } -} - -} - -#endif http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/df2fc08d/be/src/util/decimal-util.h ---------------------------------------------------------------------- diff --git a/be/src/util/decimal-util.h b/be/src/util/decimal-util.h index 2ce69bc..79b7130 100644 --- a/be/src/util/decimal-util.h +++ b/be/src/util/decimal-util.h @@ -22,7 +22,7 @@ #include "runtime/multi-precision.h" #include "runtime/types.h" -#include "util/bit-util.inline.h" +#include "util/bit-util.h" namespace impala { http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/df2fc08d/testdata/workloads/functional-query/queries/QueryTest/decimal_avro.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/decimal_avro.test b/testdata/workloads/functional-query/queries/QueryTest/decimal_avro.test index 826dd2f..73fd27d 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/decimal_avro.test +++ b/testdata/workloads/functional-query/queries/QueryTest/decimal_avro.test @@ -38,3 +38,18 @@ select count(*) from avro_decimal_tbl ---- TYPES BIGINT ==== +---- QUERY +select l_shipmode, count(distinct l_quantity), avg(l_extendedprice), max(l_discount), ndv(l_tax) +from tpch_avro_snap.lineitem +group by l_shipmode +---- RESULTS +'SHIP',50,38267.37,0.10,9 +'REG AIR',50,38268.41,0.10,9 +'TRUCK',50,38209.82,0.10,9 +'RAIL',50,38269.81,0.10,9 +'FOB',50,38246.23,0.10,9 +'MAIL',50,38224.29,0.10,9 +'AIR',50,38299.98,0.10,9 +----TYPES +STRING,DECIMAL,DECIMAL,DECIMAL,DECIMAL +====
