Repository: incubator-impala
Updated Branches:
  refs/heads/master c7fe4385d -> db5de41a8


IMPALA-4291: Reduce LLVM module's preparation time

Previously, when creating a LlvmCodeGen object, we
run an O(mn) algorithm to map the IRFunction::Type
to the actual LLVM::Function object in the module.
m is the size of IRFunction::Type enum and n is
the total number of functions in the module. This
is a waste of time if we only use few functions
from the module.

This change reduces the preparation time of a simple
query from 23ms to 10ms.

select count(*) from tpch100_parquet.lineitem where l_orderkey > 20;

Change-Id: I61ab9fa8cca5a0909bb716c3c62819da3e3b3041
Reviewed-on: http://gerrit.cloudera.org:8080/4691
Reviewed-by: Michael Ho <k...@cloudera.com>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/47b8aa3a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/47b8aa3a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/47b8aa3a

Branch: refs/heads/master
Commit: 47b8aa3a9e7682ebb182696901916900d3323039
Parents: c7fe438
Author: Michael Ho <k...@cloudera.com>
Authored: Sat Oct 8 22:43:35 2016 -0700
Committer: Internal Jenkins <cloudera-hud...@gerrit.cloudera.org>
Committed: Fri Oct 14 03:02:35 2016 +0000

----------------------------------------------------------------------
 be/src/codegen/gen_ir_descriptions.py | 224 +++++++++++++++++++----------
 be/src/codegen/llvm-codegen.cc        |  74 ++++------
 2 files changed, 175 insertions(+), 123 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/47b8aa3a/be/src/codegen/gen_ir_descriptions.py
----------------------------------------------------------------------
diff --git a/be/src/codegen/gen_ir_descriptions.py 
b/be/src/codegen/gen_ir_descriptions.py
index 60244a7..2b08509 100755
--- a/be/src/codegen/gen_ir_descriptions.py
+++ b/be/src/codegen/gen_ir_descriptions.py
@@ -38,90 +38,156 @@ options, args = parser.parse_args()
 #   - be/src/generated-sources/impala-ir/impala-ir-function-names.h
 #     This file contains a mapping of <string, enum>
 
-# Mapping of enum to compiled function name.  The compiled function name only 
has to
-# be a substring of the actual, mangled compiler generated name.
+# Mapping of enum to compiled function name. The compiled function name has to 
be
+# the actual mangled compiler generated name. One can easily obtain that by 
disassembling
+# the bit code module.
 # TODO: should we work out the mangling rules?
 ir_functions = [
-  ["AGG_NODE_PROCESS_ROW_BATCH_WITH_GROUPING", "ProcessRowBatchWithGrouping"],
-  ["AGG_NODE_PROCESS_ROW_BATCH_NO_GROUPING", "ProcessRowBatchNoGrouping"],
-  ["AGG_NODE_GET_EXPR_CTX", "GetAggExprCtx"],
-  ["AGG_NODE_GET_FN_CTX", "GetAggFnCtx"],
+  ["AGG_NODE_PROCESS_ROW_BATCH_WITH_GROUPING",
+   "_ZN6impala15AggregationNode27ProcessRowBatchWithGroupingEPNS_8RowBatchE"],
+  ["AGG_NODE_PROCESS_ROW_BATCH_NO_GROUPING",
+   "_ZN6impala15AggregationNode25ProcessRowBatchNoGroupingEPNS_8RowBatchE"],
+  ["AGG_NODE_GET_EXPR_CTX",
+   "_ZNK6impala15AggregationNode13GetAggExprCtxEi"],
+  ["AGG_NODE_GET_FN_CTX",
+   "_ZNK6impala15AggregationNode11GetAggFnCtxEi"],
   ["PART_AGG_NODE_PROCESS_BATCH_UNAGGREGATED",
-      "PartitionedAggregationNode12ProcessBatchILb0"],
+   
"_ZN6impala26PartitionedAggregationNode12ProcessBatchILb0EEENS_6StatusEPNS_8RowBatchENS_13TPrefetchMode4typeEPNS_12HashTableCtxE"],
   ["PART_AGG_NODE_PROCESS_BATCH_AGGREGATED",
-      "PartitionedAggregationNode12ProcessBatchILb1"],
+   
"_ZN6impala26PartitionedAggregationNode12ProcessBatchILb1EEENS_6StatusEPNS_8RowBatchENS_13TPrefetchMode4typeEPNS_12HashTableCtxE"],
   ["PART_AGG_NODE_PROCESS_BATCH_NO_GROUPING",
-      "PartitionedAggregationNode22ProcessBatchNoGrouping"],
+   
"_ZN6impala26PartitionedAggregationNode22ProcessBatchNoGroupingEPNS_8RowBatchE"],
   ["PART_AGG_NODE_PROCESS_BATCH_STREAMING",
-      "PartitionedAggregationNode21ProcessBatchStreaming"],
+   
"_ZN6impala26PartitionedAggregationNode21ProcessBatchStreamingEbNS_13TPrefetchMode4typeEPNS_8RowBatchES4_PNS_12HashTableCtxEPi"],
   ["PART_AGG_NODE_GET_EXPR_CTX",
-      "PartitionedAggregationNode17GetAggExprContext"],
-  ["AVG_UPDATE_BIGINT", "9AvgUpdateIN10impala_udf9BigIntVal"],
-  ["AVG_UPDATE_DOUBLE", "9AvgUpdateIN10impala_udf9DoubleVal"],
-  ["AVG_UPDATE_TIMESTAMP", "TimestampAvgUpdate"],
-  ["AVG_UPDATE_DECIMAL", "DecimalAvgUpdate"],
-  ["AVG_MERGE", "8AvgMerge"],
-  ["AVG_MERGE_DECIMAL", "DecimalAvgMerge"],
-  ["CODEGEN_ANYVAL_STRING_VAL_EQ", "StringValEq"],
-  ["CODEGEN_ANYVAL_STRING_VALUE_EQ", "StringValueEq"],
-  ["CODEGEN_ANYVAL_TIMESTAMP_VAL_EQ", "TimestampValEq"],
-  ["CODEGEN_ANYVAL_TIMESTAMP_VALUE_EQ", "TimestampValueEq"],
-  ["EXPR_GET_BOOLEAN_VAL", "4Expr13GetBooleanVal"],
-  ["EXPR_GET_TINYINT_VAL", "4Expr13GetTinyIntVal"],
-  ["EXPR_GET_SMALLINT_VAL", "4Expr14GetSmallIntVal"],
-  ["EXPR_GET_INT_VAL", "4Expr9GetIntVal"],
-  ["EXPR_GET_BIGINT_VAL", "4Expr12GetBigIntVal"],
-  ["EXPR_GET_FLOAT_VAL", "4Expr11GetFloatVal"],
-  ["EXPR_GET_DOUBLE_VAL", "4Expr12GetDoubleVal"],
-  ["EXPR_GET_STRING_VAL", "4Expr12GetStringVal"],
-  ["EXPR_GET_TIMESTAMP_VAL", "4Expr15GetTimestampVal"],
-  ["EXPR_GET_DECIMAL_VAL", "4Expr13GetDecimalVal"],
+   "_ZNK6impala26PartitionedAggregationNode17GetAggExprContextEi"],
+  ["AVG_UPDATE_BIGINT",
+   
"_ZN6impala18AggregateFunctions9AvgUpdateIN10impala_udf9BigIntValEEEvPNS2_15FunctionContextERKT_PNS2_9StringValE"],
+  ["AVG_UPDATE_DOUBLE",
+   
"_ZN6impala18AggregateFunctions9AvgUpdateIN10impala_udf9DoubleValEEEvPNS2_15FunctionContextERKT_PNS2_9StringValE"],
+  ["AVG_UPDATE_TIMESTAMP",
+   
"_ZN6impala18AggregateFunctions18TimestampAvgUpdateEPN10impala_udf15FunctionContextERKNS1_12TimestampValEPNS1_9StringValE"],
+  ["AVG_UPDATE_DECIMAL",
+   
"_ZN6impala18AggregateFunctions16DecimalAvgUpdateEPN10impala_udf15FunctionContextERKNS1_10DecimalValEPNS1_9StringValE"],
+  ["AVG_MERGE",
+   
"_ZN6impala18AggregateFunctions8AvgMergeEPN10impala_udf15FunctionContextERKNS1_9StringValEPS4_"],
+  ["AVG_MERGE_DECIMAL",
+   
"_ZN6impala18AggregateFunctions15DecimalAvgMergeEPN10impala_udf15FunctionContextERKNS1_9StringValEPS4_"],
+  ["CODEGEN_ANYVAL_STRING_VAL_EQ",
+   "_Z11StringValEqRKN10impala_udf9StringValES2_"],
+  ["CODEGEN_ANYVAL_STRING_VALUE_EQ",
+   "_Z13StringValueEqRKN10impala_udf9StringValERKN6impala11StringValueE"],
+  ["CODEGEN_ANYVAL_TIMESTAMP_VAL_EQ",
+   "_Z14TimestampValEqRKN10impala_udf12TimestampValES2_"],
+  ["CODEGEN_ANYVAL_TIMESTAMP_VALUE_EQ",
+   
"_Z16TimestampValueEqRKN10impala_udf12TimestampValERKN6impala14TimestampValueE"],
+  ["EXPR_GET_BOOLEAN_VAL",
+   "_ZN6impala4Expr13GetBooleanValEPS0_PNS_11ExprContextEPKNS_8TupleRowE"],
+  ["EXPR_GET_TINYINT_VAL",
+   "_ZN6impala4Expr13GetTinyIntValEPS0_PNS_11ExprContextEPKNS_8TupleRowE"],
+  ["EXPR_GET_SMALLINT_VAL",
+   "_ZN6impala4Expr14GetSmallIntValEPS0_PNS_11ExprContextEPKNS_8TupleRowE"],
+  ["EXPR_GET_INT_VAL",
+   "_ZN6impala4Expr9GetIntValEPS0_PNS_11ExprContextEPKNS_8TupleRowE"],
+  ["EXPR_GET_BIGINT_VAL",
+   "_ZN6impala4Expr12GetBigIntValEPS0_PNS_11ExprContextEPKNS_8TupleRowE"],
+  ["EXPR_GET_FLOAT_VAL",
+   "_ZN6impala4Expr11GetFloatValEPS0_PNS_11ExprContextEPKNS_8TupleRowE"],
+  ["EXPR_GET_DOUBLE_VAL",
+   "_ZN6impala4Expr12GetDoubleValEPS0_PNS_11ExprContextEPKNS_8TupleRowE"],
+  ["EXPR_GET_STRING_VAL",
+   "_ZN6impala4Expr12GetStringValEPS0_PNS_11ExprContextEPKNS_8TupleRowE"],
+  ["EXPR_GET_TIMESTAMP_VAL",
+   "_ZN6impala4Expr15GetTimestampValEPS0_PNS_11ExprContextEPKNS_8TupleRowE"],
+  ["EXPR_GET_DECIMAL_VAL",
+   "_ZN6impala4Expr13GetDecimalValEPS0_PNS_11ExprContextEPKNS_8TupleRowE"],
   ["HASH_CRC", "IrCrcHash"],
   ["HASH_FNV", "IrFnvHash"],
   ["HASH_MURMUR", "IrMurmurHash"],
-  ["HASH_JOIN_PROCESS_BUILD_BATCH", "12HashJoinNode17ProcessBuildBatch"],
-  ["HASH_JOIN_PROCESS_PROBE_BATCH", "12HashJoinNode17ProcessProbeBatch"],
-  ["PHJ_PROCESS_BUILD_BATCH", "10PhjBuilder17ProcessBuildBatch"],
-  ["PHJ_PROCESS_PROBE_BATCH_INNER_JOIN", "ProcessProbeBatchILi0"],
-  ["PHJ_PROCESS_PROBE_BATCH_LEFT_OUTER_JOIN", "ProcessProbeBatchILi1"],
-  ["PHJ_PROCESS_PROBE_BATCH_LEFT_SEMI_JOIN", "ProcessProbeBatchILi2"],
-  ["PHJ_PROCESS_PROBE_BATCH_LEFT_ANTI_JOIN", "ProcessProbeBatchILi3"],
-  ["PHJ_PROCESS_PROBE_BATCH_NULL_AWARE_LEFT_ANTI_JOIN", 
"ProcessProbeBatchILi4"],
-  ["PHJ_PROCESS_PROBE_BATCH_RIGHT_OUTER_JOIN", "ProcessProbeBatchILi5"],
-  ["PHJ_PROCESS_PROBE_BATCH_RIGHT_SEMI_JOIN", "ProcessProbeBatchILi6"],
-  ["PHJ_PROCESS_PROBE_BATCH_RIGHT_ANTI_JOIN", "ProcessProbeBatchILi7"],
-  ["PHJ_PROCESS_PROBE_BATCH_FULL_OUTER_JOIN", "ProcessProbeBatchILi8"],
-  ["PHJ_INSERT_BATCH", "10PhjBuilder9Partition11InsertBatch"],
-  ["HASH_TABLE_GET_HASH_SEED", "GetHashSeed"],
-  ["HASH_TABLE_GET_BUILD_EXPR_CTX", "HashTableCtx15GetBuildExprCtx"],
-  ["HASH_TABLE_GET_PROBE_EXPR_CTX", "HashTableCtx15GetProbeExprCtx"],
-  ["HLL_UPDATE_BOOLEAN", "HllUpdateIN10impala_udf10BooleanVal"],
-  ["HLL_UPDATE_TINYINT", "HllUpdateIN10impala_udf10TinyIntVal"],
-  ["HLL_UPDATE_SMALLINT", "HllUpdateIN10impala_udf11SmallIntVal"],
-  ["HLL_UPDATE_INT", "HllUpdateIN10impala_udf6IntVal"],
-  ["HLL_UPDATE_BIGINT", "HllUpdateIN10impala_udf9BigIntVal"],
-  ["HLL_UPDATE_FLOAT", "HllUpdateIN10impala_udf8FloatVal"],
-  ["HLL_UPDATE_DOUBLE", "HllUpdateIN10impala_udf9DoubleVal"],
-  ["HLL_UPDATE_STRING", "HllUpdateIN10impala_udf9StringVal"],
-  ["HLL_UPDATE_TIMESTAMP", "HllUpdateIN10impala_udf12TimestampVal"],
-  ["HLL_UPDATE_DECIMAL", "HllUpdateIN10impala_udf10DecimalVal"],
-  ["HLL_MERGE", "HllMerge"],
-  ["DECODE_AVRO_DATA", "DecodeAvroData"],
-  ["READ_UNION_TYPE", "ReadUnionType"],
-  ["READ_AVRO_BOOLEAN", "ReadAvroBoolean"],
-  ["READ_AVRO_INT32", "ReadAvroInt32"],
-  ["READ_AVRO_INT64", "ReadAvroInt64"],
-  ["READ_AVRO_FLOAT", "ReadAvroFloat"],
-  ["READ_AVRO_DOUBLE", "ReadAvroDouble"],
-  ["READ_AVRO_STRING", "ReadAvroString"],
-  ["READ_AVRO_VARCHAR", "ReadAvroVarchar"],
-  ["READ_AVRO_CHAR", "ReadAvroChar"],
-  ["READ_AVRO_DECIMAL", "ReadAvroDecimal"],
-  ["HDFS_SCANNER_WRITE_ALIGNED_TUPLES", "WriteAlignedTuples"],
-  ["HDFS_SCANNER_GET_CONJUNCT_CTX", "GetConjunctCtx"],
-  ["PROCESS_SCRATCH_BATCH", "ProcessScratchBatch"],
+  ["HASH_JOIN_PROCESS_BUILD_BATCH",
+   "_ZN6impala12HashJoinNode17ProcessBuildBatchEPNS_8RowBatchE"],
+  ["HASH_JOIN_PROCESS_PROBE_BATCH",
+   "_ZN6impala12HashJoinNode17ProcessProbeBatchEPNS_8RowBatchES2_i"],
+  ["PHJ_PROCESS_BUILD_BATCH",
+   
"_ZN6impala10PhjBuilder17ProcessBuildBatchEPNS_8RowBatchEPNS_12HashTableCtxEb"],
+  ["PHJ_PROCESS_PROBE_BATCH_INNER_JOIN",
+   
"_ZN6impala23PartitionedHashJoinNode17ProcessProbeBatchILi0EEEiNS_13TPrefetchMode4typeEPNS_8RowBatchEPNS_12HashTableCtxEPNS_6StatusE"],
+  ["PHJ_PROCESS_PROBE_BATCH_LEFT_OUTER_JOIN",
+   
"_ZN6impala23PartitionedHashJoinNode17ProcessProbeBatchILi1EEEiNS_13TPrefetchMode4typeEPNS_8RowBatchEPNS_12HashTableCtxEPNS_6StatusE"],
+  ["PHJ_PROCESS_PROBE_BATCH_LEFT_SEMI_JOIN",
+   
"_ZN6impala23PartitionedHashJoinNode17ProcessProbeBatchILi2EEEiNS_13TPrefetchMode4typeEPNS_8RowBatchEPNS_12HashTableCtxEPNS_6StatusE"],
+  ["PHJ_PROCESS_PROBE_BATCH_LEFT_ANTI_JOIN",
+   
"_ZN6impala23PartitionedHashJoinNode17ProcessProbeBatchILi3EEEiNS_13TPrefetchMode4typeEPNS_8RowBatchEPNS_12HashTableCtxEPNS_6StatusE"],
+  ["PHJ_PROCESS_PROBE_BATCH_NULL_AWARE_LEFT_ANTI_JOIN",
+   
"_ZN6impala23PartitionedHashJoinNode17ProcessProbeBatchILi4EEEiNS_13TPrefetchMode4typeEPNS_8RowBatchEPNS_12HashTableCtxEPNS_6StatusE"],
+  ["PHJ_PROCESS_PROBE_BATCH_RIGHT_OUTER_JOIN",
+   
"_ZN6impala23PartitionedHashJoinNode17ProcessProbeBatchILi5EEEiNS_13TPrefetchMode4typeEPNS_8RowBatchEPNS_12HashTableCtxEPNS_6StatusE"],
+  ["PHJ_PROCESS_PROBE_BATCH_RIGHT_SEMI_JOIN",
+   
"_ZN6impala23PartitionedHashJoinNode17ProcessProbeBatchILi6EEEiNS_13TPrefetchMode4typeEPNS_8RowBatchEPNS_12HashTableCtxEPNS_6StatusE"],
+  ["PHJ_PROCESS_PROBE_BATCH_RIGHT_ANTI_JOIN",
+   
"_ZN6impala23PartitionedHashJoinNode17ProcessProbeBatchILi7EEEiNS_13TPrefetchMode4typeEPNS_8RowBatchEPNS_12HashTableCtxEPNS_6StatusE"],
+  ["PHJ_PROCESS_PROBE_BATCH_FULL_OUTER_JOIN",
+   
"_ZN6impala23PartitionedHashJoinNode17ProcessProbeBatchILi8EEEiNS_13TPrefetchMode4typeEPNS_8RowBatchEPNS_12HashTableCtxEPNS_6StatusE"],
+  ["PHJ_INSERT_BATCH",
+   
"_ZN6impala10PhjBuilder9Partition11InsertBatchENS_13TPrefetchMode4typeEPNS_12HashTableCtxEPNS_8RowBatchERKSt6vectorINS_19BufferedTupleStream6RowIdxESaISA_EE"],
+  ["HASH_TABLE_GET_HASH_SEED",
+   "_ZNK6impala12HashTableCtx11GetHashSeedEv"],
+  ["HASH_TABLE_GET_BUILD_EXPR_CTX",
+   "_ZNK6impala12HashTableCtx15GetBuildExprCtxEi"],
+  ["HASH_TABLE_GET_PROBE_EXPR_CTX",
+   "_ZNK6impala12HashTableCtx15GetProbeExprCtxEi"],
+  ["HLL_UPDATE_BOOLEAN",
+   
"_ZN6impala18AggregateFunctions9HllUpdateIN10impala_udf10BooleanValEEEvPNS2_15FunctionContextERKT_PNS2_9StringValE"],
+  ["HLL_UPDATE_TINYINT",
+   
"_ZN6impala18AggregateFunctions9HllUpdateIN10impala_udf10TinyIntValEEEvPNS2_15FunctionContextERKT_PNS2_9StringValE"],
+  ["HLL_UPDATE_SMALLINT",
+   
"_ZN6impala18AggregateFunctions9HllUpdateIN10impala_udf11SmallIntValEEEvPNS2_15FunctionContextERKT_PNS2_9StringValE"],
+  ["HLL_UPDATE_INT",
+   
"_ZN6impala18AggregateFunctions9HllUpdateIN10impala_udf6IntValEEEvPNS2_15FunctionContextERKT_PNS2_9StringValE"],
+  ["HLL_UPDATE_BIGINT",
+   
"_ZN6impala18AggregateFunctions9HllUpdateIN10impala_udf9BigIntValEEEvPNS2_15FunctionContextERKT_PNS2_9StringValE"],
+  ["HLL_UPDATE_FLOAT",
+   
"_ZN6impala18AggregateFunctions9HllUpdateIN10impala_udf8FloatValEEEvPNS2_15FunctionContextERKT_PNS2_9StringValE"],
+  ["HLL_UPDATE_DOUBLE",
+   
"_ZN6impala18AggregateFunctions9HllUpdateIN10impala_udf9DoubleValEEEvPNS2_15FunctionContextERKT_PNS2_9StringValE"],
+  ["HLL_UPDATE_STRING",
+   
"_ZN6impala18AggregateFunctions9HllUpdateIN10impala_udf9StringValEEEvPNS2_15FunctionContextERKT_PS3_"],
+  ["HLL_UPDATE_TIMESTAMP",
+   
"_ZN6impala18AggregateFunctions9HllUpdateIN10impala_udf12TimestampValEEEvPNS2_15FunctionContextERKT_PNS2_9StringValE"],
+  ["HLL_UPDATE_DECIMAL",
+   
"_ZN6impala18AggregateFunctions9HllUpdateIN10impala_udf10DecimalValEEEvPNS2_15FunctionContextERKT_PNS2_9StringValE"],
+  ["HLL_MERGE",
+   
"_ZN6impala18AggregateFunctions8HllMergeEPN10impala_udf15FunctionContextERKNS1_9StringValEPS4_"],
+  ["DECODE_AVRO_DATA",
+   
"_ZN6impala15HdfsAvroScanner14DecodeAvroDataEiPNS_7MemPoolEPPhS3_PNS_5TupleEPNS_8TupleRowE"],
+  ["READ_UNION_TYPE",
+   "_ZN6impala15HdfsAvroScanner13ReadUnionTypeEiPPhS1_Pb"],
+  ["READ_AVRO_BOOLEAN",
+   
"_ZN6impala15HdfsAvroScanner15ReadAvroBooleanENS_13PrimitiveTypeEPPhS2_bPvPNS_7MemPoolE"],
+  ["READ_AVRO_INT32",
+   
"_ZN6impala15HdfsAvroScanner13ReadAvroInt32ENS_13PrimitiveTypeEPPhS2_bPvPNS_7MemPoolE"],
+  ["READ_AVRO_INT64",
+   
"_ZN6impala15HdfsAvroScanner13ReadAvroInt64ENS_13PrimitiveTypeEPPhS2_bPvPNS_7MemPoolE"],
+  ["READ_AVRO_FLOAT",
+   
"_ZN6impala15HdfsAvroScanner13ReadAvroFloatENS_13PrimitiveTypeEPPhS2_bPvPNS_7MemPoolE"],
+  ["READ_AVRO_DOUBLE",
+   
"_ZN6impala15HdfsAvroScanner14ReadAvroDoubleENS_13PrimitiveTypeEPPhS2_bPvPNS_7MemPoolE"],
+  ["READ_AVRO_STRING",
+   
"_ZN6impala15HdfsAvroScanner14ReadAvroStringENS_13PrimitiveTypeEPPhS2_bPvPNS_7MemPoolE"],
+  ["READ_AVRO_VARCHAR",
+   
"_ZN6impala15HdfsAvroScanner15ReadAvroVarcharENS_13PrimitiveTypeEiPPhS2_bPvPNS_7MemPoolE"],
+  ["READ_AVRO_CHAR",
+   
"_ZN6impala15HdfsAvroScanner12ReadAvroCharENS_13PrimitiveTypeEiPPhS2_bPvPNS_7MemPoolE"],
+  ["READ_AVRO_DECIMAL",
+   "_ZN6impala15HdfsAvroScanner15ReadAvroDecimalEiPPhS1_bPvPNS_7MemPoolE"],
+  ["HDFS_SCANNER_WRITE_ALIGNED_TUPLES",
+   
"_ZN6impala11HdfsScanner18WriteAlignedTuplesEPNS_7MemPoolEPNS_8TupleRowEiPNS_13FieldLocationEiiii"],
+  ["HDFS_SCANNER_GET_CONJUNCT_CTX",
+   "_ZNK6impala11HdfsScanner14GetConjunctCtxEi"],
+  ["PROCESS_SCRATCH_BATCH",
+   "_ZN6impala18HdfsParquetScanner19ProcessScratchBatchEPNS_8RowBatchE"],
   ["STRING_TO_BOOL", "IrStringToBool"],
-  ["STRING_TO_INT8", "IrStringToInt8"],
+  ["STRING_TO_INT8", 
"_Z14IrStringToInt8PKciPN6impala12StringParser11ParseResultE"],
   ["STRING_TO_INT16", "IrStringToInt16"],
   ["STRING_TO_INT32", "IrStringToInt32"],
   ["STRING_TO_INT64", "IrStringToInt64"],
@@ -129,10 +195,14 @@ ir_functions = [
   ["STRING_TO_DOUBLE", "IrStringToDouble"],
   ["IS_NULL_STRING", "IrIsNullString"],
   ["GENERIC_IS_NULL_STRING", "IrGenericIsNullString"],
-  ["RAW_VALUE_COMPARE", "8RawValue7Compare"],
-  ["TOPN_NODE_INSERT_BATCH", "TopNNode11InsertBatch"],
-  ["MEMPOOL_ALLOCATE", "MemPool8AllocateILb0"],
-  ["MEMPOOL_CHECKED_ALLOCATE", "MemPool8AllocateILb1"],
+  ["RAW_VALUE_COMPARE",
+   "_ZN6impala8RawValue7CompareEPKvS2_RKNS_10ColumnTypeE"],
+  ["TOPN_NODE_INSERT_BATCH",
+   "_ZN6impala8TopNNode11InsertBatchEPNS_8RowBatchE"],
+  ["MEMPOOL_ALLOCATE",
+   "_ZN6impala7MemPool8AllocateILb0EEEPhl"],
+  ["MEMPOOL_CHECKED_ALLOCATE",
+   "_ZN6impala7MemPool8AllocateILb1EEEPhl"]
 ]
 
 enums_preamble = '\

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/47b8aa3a/be/src/codegen/llvm-codegen.cc
----------------------------------------------------------------------
diff --git a/be/src/codegen/llvm-codegen.cc b/be/src/codegen/llvm-codegen.cc
index b107c51..cc8b46c 100644
--- a/be/src/codegen/llvm-codegen.cc
+++ b/be/src/codegen/llvm-codegen.cc
@@ -201,6 +201,15 @@ void LlvmCodeGen::InitializeLlvm(bool load_backend) {
   scoped_ptr<LlvmCodeGen> init_codegen;
   Status status = LlvmCodeGen::CreateFromMemory(&init_pool, "init", 
&init_codegen);
   ParseGVForFunctions(init_codegen->module_, &gv_ref_ir_fns_);
+
+  // Validate the module by verifying that functions for all IRFunction::Type
+  // can be found.
+  for (int i = IRFunction::FN_START; i < IRFunction::FN_END; ++i) {
+    DCHECK(FN_MAPPINGS[i].fn == i);
+    const string& fn_name = FN_MAPPINGS[i].fn_name;
+    DCHECK(init_codegen->module_->getFunction(fn_name) != NULL)
+        << "Failed to find function " << fn_name;
+  }
 }
 
 LlvmCodeGen::LlvmCodeGen(ObjectPool* pool, const string& id) :
@@ -210,7 +219,8 @@ LlvmCodeGen::LlvmCodeGen(ObjectPool* pool, const string& 
id) :
   is_corrupt_(false),
   is_compiled_(false),
   context_(new llvm::LLVMContext()),
-  module_(NULL) {
+  module_(NULL),
+  loaded_functions_(IRFunction::FN_END, NULL) {
 
   DCHECK(llvm_initialized_) << "Must call LlvmCodeGen::InitializeLlvm first.";
 
@@ -222,8 +232,6 @@ LlvmCodeGen::LlvmCodeGen(ObjectPool* pool, const string& 
id) :
   compile_timer_ = ADD_TIMER(&profile_, "CompileTime");
   num_functions_ = ADD_COUNTER(&profile_, "NumFunctions", TUnit::UNIT);
   num_instructions_ = ADD_COUNTER(&profile_, "NumInstructions", TUnit::UNIT);
-
-  loaded_functions_.resize(IRFunction::FN_END);
 }
 
 Status LlvmCodeGen::CreateFromFile(ObjectPool* pool,
@@ -390,49 +398,12 @@ Status LlvmCodeGen::CreateImpalaCodegen(
     return Status("Could not create llvm struct type for StringVal");
   }
 
-  // Fills 'functions' with all the cross-compiled functions that are defined 
in
-  // the module.
-  vector<Function*> functions;
-  for (Function& fn: codegen->module_->functions()) {
-    if (fn.isMaterializable()) functions.push_back(&fn);
-    if (gv_ref_ir_fns_.find(fn.getName()) != gv_ref_ir_fns_.end()) {
-      codegen->MaterializeFunction(&fn);
-    }
-  }
-  int parsed_functions = 0;
-  for (int i = 0; i < functions.size(); ++i) {
-    string fn_name = functions[i]->getName();
-    for (int j = IRFunction::FN_START; j < IRFunction::FN_END; ++j) {
-      // Substring match to match precompiled functions.  The compiled 
function names
-      // will be mangled.
-      // TODO: reconsider this.  Substring match is probably not strict enough 
but
-      // undoing the mangling is no fun either.
-      if (fn_name.find(FN_MAPPINGS[j].fn_name) != string::npos) {
-        // TODO: make this a DCHECK when we resolve IMPALA-2439
-        CHECK(codegen->loaded_functions_[FN_MAPPINGS[j].fn] == NULL)
-            << "Duplicate definition found for function " << 
FN_MAPPINGS[j].fn_name
-            << ": " << fn_name;
-        functions[i]->addFnAttr(Attribute::AlwaysInline);
-        codegen->loaded_functions_[FN_MAPPINGS[j].fn] = functions[i];
-        ++parsed_functions;
-      }
-    }
-  }
-
-  if (parsed_functions != IRFunction::FN_END) {
-    stringstream ss;
-    ss << "Unable to find these precompiled functions: ";
-    bool first = true;
-    for (int i = IRFunction::FN_START; i != IRFunction::FN_END; ++i) {
-      if (codegen->loaded_functions_[i] == NULL) {
-        if (!first) ss << ", ";
-        ss << FN_MAPPINGS[i].fn_name;
-        first = false;
-      }
-    }
-    return Status(ss.str());
+  // Materialize functions implicitly referenced by the global variables.
+  for (const string& fn_name : gv_ref_ir_fns_) {
+    Function* fn = codegen->module_->getFunction(fn_name);
+    DCHECK(fn != NULL);
+    codegen->MaterializeFunction(fn);
   }
-
   return Status::OK();
 }
 
@@ -687,8 +658,19 @@ Function* LlvmCodeGen::GetFunction(const string& symbol) {
 }
 
 Function* LlvmCodeGen::GetFunction(IRFunction::Type ir_type, bool clone) {
-  DCHECK(loaded_functions_[ir_type] != NULL);
   Function* fn = loaded_functions_[ir_type];
+  if (fn == NULL) {
+    DCHECK(FN_MAPPINGS[ir_type].fn == ir_type);
+    const string& fn_name = FN_MAPPINGS[ir_type].fn_name;
+    fn = module_->getFunction(fn_name);
+    if (fn == NULL) {
+      LOG(ERROR) << "Unable to locate function " << fn_name;
+      return NULL;
+    }
+    // Mixing "NoInline" with "AlwaysInline" will lead to compilation failure.
+    if (!fn->hasFnAttribute(Attribute::NoInline)) 
fn->addFnAttr(Attribute::AlwaysInline);
+    loaded_functions_[ir_type] = fn;
+  }
   Status status = MaterializeFunction(fn);
   if (UNLIKELY(!status.ok())) return NULL;
   if (clone) return CloneFunction(fn);

Reply via email to