http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/852e1bb7/fe/src/main/java/org/apache/impala/catalog/ScalarType.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/catalog/ScalarType.java b/fe/src/main/java/org/apache/impala/catalog/ScalarType.java index ea51a3e..068d6fa 100644 --- a/fe/src/main/java/org/apache/impala/catalog/ScalarType.java +++ b/fe/src/main/java/org/apache/impala/catalog/ScalarType.java @@ -39,7 +39,8 @@ import com.google.common.base.Preconditions; public class ScalarType extends Type { private final PrimitiveType type_; - // Only used for type CHAR. + // Used for fixed-length types parameterized by size, i.e. CHAR, VARCHAR and + // FIXED_UDA_INTERMEDIATE. private int len_; // Only used if type is DECIMAL. -1 (for both) is used to represent a @@ -98,6 +99,12 @@ public class ScalarType extends Type { return type; } + public static ScalarType createFixedUdaIntermediateType(int len) { + ScalarType type = new ScalarType(PrimitiveType.FIXED_UDA_INTERMEDIATE); + type.len_ = len; + return type; + } + public static ScalarType createDecimalType() { return DEFAULT_DECIMAL; } public static ScalarType createDecimalType(int precision) { @@ -182,6 +189,8 @@ public class ScalarType extends Type { } else if (type_ == PrimitiveType.VARCHAR) { if (isWildcardVarchar()) return "VARCHAR(*)"; return "VARCHAR(" + len_ + ")"; + } else if (type_ == PrimitiveType.FIXED_UDA_INTERMEDIATE) { + return "FIXED_UDA_INTERMEDIATE(" + len_ + ")"; } return type_.toString(); } @@ -193,6 +202,7 @@ public class ScalarType extends Type { case BINARY: return type_.toString(); case VARCHAR: case CHAR: + case FIXED_UDA_INTERMEDIATE: return type_.toString() + "(" + len_ + ")"; case DECIMAL: return String.format("%s(%s,%s)", type_.toString(), precision_, scale_); @@ -211,7 +221,8 @@ public class ScalarType extends Type { container.types.add(node); switch(type_) { case VARCHAR: - case CHAR: { + case CHAR: + case FIXED_UDA_INTERMEDIATE: { node.setType(TTypeNodeType.SCALAR); TScalarType scalarType = new TScalarType(); scalarType.setType(type_.toThrift()); @@ -287,7 +298,8 @@ public class ScalarType extends Type { || type_ == PrimitiveType.BIGINT || type_ == PrimitiveType.FLOAT || type_ == PrimitiveType.DOUBLE || type_ == PrimitiveType.DATE || type_ == PrimitiveType.DATETIME || type_ == PrimitiveType.TIMESTAMP - || type_ == PrimitiveType.CHAR || type_ == PrimitiveType.DECIMAL; + || type_ == PrimitiveType.CHAR || type_ == PrimitiveType.DECIMAL + || type_ == PrimitiveType.FIXED_UDA_INTERMEDIATE; } @Override @@ -307,6 +319,7 @@ public class ScalarType extends Type { public int getSlotSize() { switch (type_) { case CHAR: + case FIXED_UDA_INTERMEDIATE: return len_; case DECIMAL: return TypesUtil.getDecimalSlotSize(this); default: @@ -344,7 +357,9 @@ public class ScalarType extends Type { if (!(o instanceof ScalarType)) return false; ScalarType other = (ScalarType)o; if (type_ != other.type_) return false; - if (type_ == PrimitiveType.CHAR) return len_ == other.len_; + if (type_ == PrimitiveType.CHAR || type_ == PrimitiveType.FIXED_UDA_INTERMEDIATE) { + return len_ == other.len_; + } if (type_ == PrimitiveType.VARCHAR) return len_ == other.len_; if (type_ == PrimitiveType.DECIMAL) { return precision_ == other.precision_ && scale_ == other.scale_;
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/852e1bb7/fe/src/main/java/org/apache/impala/catalog/Type.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/catalog/Type.java b/fe/src/main/java/org/apache/impala/catalog/Type.java index 3061194..edf44f0 100644 --- a/fe/src/main/java/org/apache/impala/catalog/Type.java +++ b/fe/src/main/java/org/apache/impala/catalog/Type.java @@ -31,7 +31,6 @@ import org.apache.impala.thrift.TPrimitiveType; import org.apache.impala.thrift.TScalarType; import org.apache.impala.thrift.TStructField; import org.apache.impala.thrift.TTypeNode; -import org.apache.impala.thrift.TTypeNodeType; import com.google.common.base.Preconditions; import com.google.common.collect.Lists; @@ -69,6 +68,8 @@ public abstract class Type { public static final ScalarType DEFAULT_VARCHAR = ScalarType.createVarcharType(-1); public static final ScalarType VARCHAR = ScalarType.createVarcharType(-1); public static final ScalarType CHAR = ScalarType.createCharType(-1); + public static final ScalarType FIXED_UDA_INTERMEDIATE = + ScalarType.createFixedUdaIntermediateType(-1); private static ArrayList<ScalarType> integerTypes; private static ArrayList<ScalarType> numericTypes; @@ -457,6 +458,7 @@ public abstract class Type { return 29; case CHAR: case VARCHAR: + case FIXED_UDA_INTERMEDIATE: return t.getLength(); default: return null; @@ -577,6 +579,7 @@ public abstract class Type { case VARCHAR: return java.sql.Types.VARCHAR; case BINARY: return java.sql.Types.BINARY; case DECIMAL: return java.sql.Types.DECIMAL; + case FIXED_UDA_INTERMEDIATE: return java.sql.Types.BINARY; default: Preconditions.checkArgument(false, "Invalid primitive type " + t.getPrimitiveType().name()); @@ -620,6 +623,14 @@ public abstract class Type { // BINARY is not supported. compatibilityMatrix[BINARY.ordinal()][i] = PrimitiveType.INVALID_TYPE; compatibilityMatrix[i][BINARY.ordinal()] = PrimitiveType.INVALID_TYPE; + + // FIXED_UDA_INTERMEDIATE cannot be cast to/from another type + if (i != FIXED_UDA_INTERMEDIATE.ordinal()) { + compatibilityMatrix[FIXED_UDA_INTERMEDIATE.ordinal()][i] = + PrimitiveType.INVALID_TYPE; + compatibilityMatrix[i][FIXED_UDA_INTERMEDIATE.ordinal()] = + PrimitiveType.INVALID_TYPE; + } } compatibilityMatrix[BOOLEAN.ordinal()][TINYINT.ordinal()] = PrimitiveType.TINYINT; http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/852e1bb7/fe/src/main/java/org/apache/impala/service/MetadataOp.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/service/MetadataOp.java b/fe/src/main/java/org/apache/impala/service/MetadataOp.java index 6883bac..8887908 100644 --- a/fe/src/main/java/org/apache/impala/service/MetadataOp.java +++ b/fe/src/main/java/org/apache/impala/service/MetadataOp.java @@ -569,7 +569,8 @@ public class MetadataOp { ptype.equals(PrimitiveType.DATETIME) || ptype.equals(PrimitiveType.DECIMAL) || ptype.equals(PrimitiveType.CHAR) || - ptype.equals(PrimitiveType.VARCHAR)) { + ptype.equals(PrimitiveType.VARCHAR) || + ptype.equals(PrimitiveType.FIXED_UDA_INTERMEDIATE)) { continue; } Type type = ScalarType.createType(ptype); http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/852e1bb7/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java ---------------------------------------------------------------------- diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java b/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java index 2a3e383..9e2ffd7 100644 --- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java +++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java @@ -3127,6 +3127,9 @@ public class AnalyzeDDLTest extends FrontendTestBase { AnalysisError("create aggregate function foo(int) RETURNS struct<f:int> " + loc + "UPDATE_FN='AggUpdate'", "Type 'STRUCT<f:INT>' is not supported in UDFs/UDAs."); + AnalysisError("create aggregate function foo(int) RETURNS int " + + "INTERMEDIATE fixed_uda_intermediate(10) " + loc + " UPDATE_FN='foo'", + "Syntax error in line 1"); // Test missing .ll file. TODO: reenable when we can run IR UDAs AnalysisError("create aggregate function foo(int) RETURNS int LOCATION " + http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/852e1bb7/fe/src/test/java/org/apache/impala/service/FrontendTest.java ---------------------------------------------------------------------- diff --git a/fe/src/test/java/org/apache/impala/service/FrontendTest.java b/fe/src/test/java/org/apache/impala/service/FrontendTest.java index d303315..938b00e 100644 --- a/fe/src/test/java/org/apache/impala/service/FrontendTest.java +++ b/fe/src/test/java/org/apache/impala/service/FrontendTest.java @@ -141,9 +141,9 @@ public class FrontendTest extends FrontendTestBase { assertEquals(18, resp.schema.columns.size()); assertEquals(18, resp.rows.get(0).colVals.size()); // All primitives types, except INVALID_TYPE, DATE, DATETIME, DECIMAL, CHAR, - // and VARCHAR should be returned. - // Therefore #supported types = PrimitiveType.values().length - 6. - assertEquals(PrimitiveType.values().length - 6, resp.rows.size()); + // VARCHAR, and FIXED_UDA_INTERMEDIATE should be returned. + // Therefore #supported types = PrimitiveType.values().length - 7. + assertEquals(PrimitiveType.values().length - 7, resp.rows.size()); } @Test http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/852e1bb7/testdata/workloads/functional-planner/queries/PlannerTest/spillable-buffer-sizing.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/spillable-buffer-sizing.test b/testdata/workloads/functional-planner/queries/PlannerTest/spillable-buffer-sizing.test index 43a297b..99ef97b 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/spillable-buffer-sizing.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/spillable-buffer-sizing.test @@ -516,7 +516,7 @@ select c_nationkey, avg(c_acctbal) from tpch_parquet.customer group by c_nationkey ---- DISTRIBUTEDPLAN -Max Per-Host Resource Reservation: Memory=1.12MB +Max Per-Host Resource Reservation: Memory=1.06MB Per-Host Resource Estimates: Memory=44.00MB F02:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 @@ -529,11 +529,11 @@ PLAN-ROOT SINK | tuple-ids=2 row-size=10B cardinality=25 | F01:PLAN FRAGMENT [HASH(c_nationkey)] hosts=1 instances=1 -Per-Host Resources: mem-estimate=10.00MB mem-reservation=1.12MB +Per-Host Resources: mem-estimate=10.00MB mem-reservation=1.06MB 03:AGGREGATE [FINALIZE] | output: avg:merge(c_acctbal) | group by: c_nationkey -| mem-estimate=10.00MB mem-reservation=1.12MB spill-buffer=64.00KB +| mem-estimate=10.00MB mem-reservation=1.06MB spill-buffer=64.00KB | tuple-ids=2 row-size=10B cardinality=25 | 02:EXCHANGE [HASH(c_nationkey)] @@ -556,7 +556,7 @@ Per-Host Resources: mem-estimate=34.00MB mem-reservation=0B mem-estimate=24.00MB mem-reservation=0B tuple-ids=0 row-size=10B cardinality=150000 ---- PARALLELPLANS -Max Per-Host Resource Reservation: Memory=2.25MB +Max Per-Host Resource Reservation: Memory=2.12MB Per-Host Resource Estimates: Memory=88.00MB F02:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 @@ -569,11 +569,11 @@ PLAN-ROOT SINK | tuple-ids=2 row-size=10B cardinality=25 | F01:PLAN FRAGMENT [HASH(c_nationkey)] hosts=1 instances=2 -Per-Host Resources: mem-estimate=20.00MB mem-reservation=2.25MB +Per-Host Resources: mem-estimate=20.00MB mem-reservation=2.12MB 03:AGGREGATE [FINALIZE] | output: avg:merge(c_acctbal) | group by: c_nationkey -| mem-estimate=10.00MB mem-reservation=1.12MB spill-buffer=64.00KB +| mem-estimate=10.00MB mem-reservation=1.06MB spill-buffer=64.00KB | tuple-ids=2 row-size=10B cardinality=25 | 02:EXCHANGE [HASH(c_nationkey)] http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/852e1bb7/testdata/workloads/functional-query/queries/QueryTest/alloc-fail-init.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/alloc-fail-init.test b/testdata/workloads/functional-query/queries/QueryTest/alloc-fail-init.test index eb4646c..fed59b9 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/alloc-fail-init.test +++ b/testdata/workloads/functional-query/queries/QueryTest/alloc-fail-init.test @@ -1,11 +1,6 @@ ==== ---- QUERY # TODO: IMPALA-3350: Add 'group by' to these tests to exercise different code paths. -select ndv(string_col) from functional.alltypes ----- CATCH -FunctionContext::Allocate() failed to allocate 1024 bytes. -==== ----- QUERY select min(string_col) from functional.alltypes ---- CATCH FunctionContext::Allocate() failed to allocate 1 bytes. @@ -16,46 +11,16 @@ select max(string_col) from functional.alltypes FunctionContext::Allocate() failed to allocate 1 bytes. ==== ---- QUERY -select avg(d1) from functional.decimal_tbl ----- CATCH -FunctionContext::Allocate() failed to allocate 48 bytes. -==== ----- QUERY -select avg(double_col) from functional.alltypes ----- CATCH -FunctionContext::Allocate() failed to allocate 16 bytes. -==== ----- QUERY -select avg(timestamp_col) from functional.alltypes ----- CATCH -FunctionContext::Allocate() failed to allocate 16 bytes. -==== ----- QUERY select sample(timestamp_col) from functional.alltypes ---- CATCH FunctionContext::Allocate() failed to allocate 248 bytes. ==== ---- QUERY -select distinctpc(int_col) from functional.alltypes ----- CATCH -FunctionContext::Allocate() failed to allocate 256 bytes. -==== ----- QUERY -select distinctpcsa(string_col) from functional.alltypes ----- CATCH -FunctionContext::Allocate() failed to allocate 256 bytes. -==== ----- QUERY select group_concat(string_col) from functional.alltypes ---- CATCH FunctionContext::Allocate() failed to allocate 4 bytes. ==== ---- QUERY -select rank() over (partition by month order by year) from functional.alltypes ----- CATCH -FunctionContext::Allocate() failed to allocate 16 bytes. -==== ----- QUERY select extract(year from timestamp_col) from functional.alltypes limit 10 ---- CATCH FunctionContextImpl::AllocateLocal() failed to allocate 4 bytes. http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/852e1bb7/testdata/workloads/functional-query/queries/QueryTest/functions-ddl.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/functions-ddl.test b/testdata/workloads/functional-query/queries/QueryTest/functions-ddl.test index c1a80ff..f629b37 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/functions-ddl.test +++ b/testdata/workloads/functional-query/queries/QueryTest/functions-ddl.test @@ -113,39 +113,35 @@ show create aggregate function _impala_builtins.avg ---- RESULTS: MULTI_LINE ['CREATE AGGREGATE FUNCTION _impala_builtins.avg(BIGINT) RETURNS DOUBLE - INTERMEDIATE STRING + INTERMEDIATE FIXED_UDA_INTERMEDIATE(16) LOCATION 'null' UPDATE_FN='_ZN6impala18AggregateFunctions9AvgUpdateIN10impala_udf9BigIntValEEEvPNS2_15FunctionContextERKT_PNS2_9StringValE' INIT_FN='_ZN6impala18AggregateFunctions7AvgInitEPN10impala_udf15FunctionContextEPNS1_9StringValE' MERGE_FN='_ZN6impala18AggregateFunctions8AvgMergeEPN10impala_udf15FunctionContextERKNS1_9StringValEPS4_' - SERIALIZE_FN='_ZN6impala18AggregateFunctions28StringValSerializeOrFinalizeEPN10impala_udf15FunctionContextERKNS1_9StringValE' FINALIZE_FN='_ZN6impala18AggregateFunctions11AvgFinalizeEPN10impala_udf15FunctionContextERKNS1_9StringValE' CREATE AGGREGATE FUNCTION _impala_builtins.avg(DECIMAL(*,*)) RETURNS DECIMAL(*,*) - INTERMEDIATE STRING + INTERMEDIATE FIXED_UDA_INTERMEDIATE(32) LOCATION 'null' UPDATE_FN='_ZN6impala18AggregateFunctions16DecimalAvgUpdateEPN10impala_udf15FunctionContextERKNS1_10DecimalValEPNS1_9StringValE' INIT_FN='_ZN6impala18AggregateFunctions14DecimalAvgInitEPN10impala_udf15FunctionContextEPNS1_9StringValE' MERGE_FN='_ZN6impala18AggregateFunctions15DecimalAvgMergeEPN10impala_udf15FunctionContextERKNS1_9StringValEPS4_' - SERIALIZE_FN='_ZN6impala18AggregateFunctions28StringValSerializeOrFinalizeEPN10impala_udf15FunctionContextERKNS1_9StringValE' FINALIZE_FN='_ZN6impala18AggregateFunctions18DecimalAvgFinalizeEPN10impala_udf15FunctionContextERKNS1_9StringValE' CREATE AGGREGATE FUNCTION _impala_builtins.avg(DOUBLE) RETURNS DOUBLE - INTERMEDIATE STRING + INTERMEDIATE FIXED_UDA_INTERMEDIATE(16) LOCATION 'null' UPDATE_FN='_ZN6impala18AggregateFunctions9AvgUpdateIN10impala_udf9DoubleValEEEvPNS2_15FunctionContextERKT_PNS2_9StringValE' INIT_FN='_ZN6impala18AggregateFunctions7AvgInitEPN10impala_udf15FunctionContextEPNS1_9StringValE' MERGE_FN='_ZN6impala18AggregateFunctions8AvgMergeEPN10impala_udf15FunctionContextERKNS1_9StringValEPS4_' - SERIALIZE_FN='_ZN6impala18AggregateFunctions28StringValSerializeOrFinalizeEPN10impala_udf15FunctionContextERKNS1_9StringValE' FINALIZE_FN='_ZN6impala18AggregateFunctions11AvgFinalizeEPN10impala_udf15FunctionContextERKNS1_9StringValE' CREATE AGGREGATE FUNCTION _impala_builtins.avg(TIMESTAMP) RETURNS TIMESTAMP - INTERMEDIATE STRING + INTERMEDIATE FIXED_UDA_INTERMEDIATE(16) LOCATION 'null' UPDATE_FN='_ZN6impala18AggregateFunctions18TimestampAvgUpdateEPN10impala_udf15FunctionContextERKNS1_12TimestampValEPNS1_9StringValE' INIT_FN='_ZN6impala18AggregateFunctions7AvgInitEPN10impala_udf15FunctionContextEPNS1_9StringValE' MERGE_FN='_ZN6impala18AggregateFunctions8AvgMergeEPN10impala_udf15FunctionContextERKNS1_9StringValEPS4_' - SERIALIZE_FN='_ZN6impala18AggregateFunctions28StringValSerializeOrFinalizeEPN10impala_udf15FunctionContextERKNS1_9StringValE' FINALIZE_FN='_ZN6impala18AggregateFunctions20TimestampAvgFinalizeEPN10impala_udf15FunctionContextERKNS1_9StringValE' '] ---- TYPES http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/852e1bb7/testdata/workloads/functional-query/queries/QueryTest/spilling-aggs.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/spilling-aggs.test b/testdata/workloads/functional-query/queries/QueryTest/spilling-aggs.test new file mode 100644 index 0000000..6fe86c3 --- /dev/null +++ b/testdata/workloads/functional-query/queries/QueryTest/spilling-aggs.test @@ -0,0 +1,202 @@ +==== +---- QUERY +set buffer_pool_limit=10m; +select l_orderkey, count(*) +from lineitem +group by 1 +order by 1 limit 10 +---- RESULTS +1,6 +2,1 +3,6 +4,1 +5,3 +6,1 +7,7 +32,6 +33,4 +34,3 +---- TYPES +BIGINT, BIGINT +---- RUNTIME_PROFILE +# Verify that spilling and passthrough were activated. +row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\) +row_regex: .*RowsPassedThrough: .* \([1-9][0-9]*\) +==== +---- QUERY +# Test query with string grouping column and string agg columns +set buffer_pool_limit=10m; +set num_nodes=1; +select l_returnflag, l_orderkey, avg(l_tax), min(l_shipmode) +from lineitem +group by 1,2 +order by 1,2 limit 3 +---- RESULTS +'A',3,0.05,'RAIL' +'A',5,0.03,'AIR' +'A',6,0.03,'TRUCK' +---- TYPES +STRING, BIGINT, DECIMAL, STRING +---- RUNTIME_PROFILE +row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\) +row_regex: .*NumRepartitions: .* \([1-9][0-9]*\) +==== +---- QUERY +set buffer_pool_limit=10m; +select l_orderkey, count(*) +from lineitem +group by 1 +order by 1 limit 10; +---- RESULTS +1,6 +2,1 +3,6 +4,1 +5,3 +6,1 +7,7 +32,6 +33,4 +34,3 +---- TYPES +BIGINT, BIGINT +---- RUNTIME_PROFILE +row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\) +row_regex: .*RowsPassedThrough: .* \([1-9][0-9]*\) +==== +---- QUERY +# Test query with string grouping column +set buffer_pool_limit=10m; +set num_nodes=1; +select l_comment, count(*) +from lineitem +group by 1 +order by count(*) desc limit 5 +---- RESULTS +' furiously',943 +' carefully',893 +' carefully ',875 +'carefully ',854 +' furiously ',845 +---- TYPES +STRING, BIGINT +---- RUNTIME_PROFILE +row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\) +row_regex: .*NumRepartitions: .* \([1-9][0-9]*\) +==== +---- QUERY +# Test query with string grouping column and string agg columns +set buffer_pool_limit=10m; +set num_nodes=1; +select l_returnflag, l_orderkey, round(avg(l_tax),2), min(l_shipmode) +from lineitem +group by 1,2 +order by 1,2 limit 3; +---- RESULTS +'A',3,0.05,'RAIL' +'A',5,0.03,'AIR' +'A',6,0.03,'TRUCK' +---- TYPES +STRING, BIGINT, DECIMAL, STRING +---- RUNTIME_PROFILE +# Verify that spilling happened in the agg. +row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\) +row_regex: .*NumRepartitions: .* \([1-9][0-9]*\) +==== +---- QUERY +# Test with non-scalar intermediate state (avg() uses fixed intermediate value). +set buffer_pool_limit=10m; +select l_orderkey, avg(l_orderkey) +from lineitem +group by 1 +order by 1 limit 5 +---- RESULTS +1,1 +2,2 +3,3 +4,4 +5,5 +---- TYPES +BIGINT, DOUBLE +---- RUNTIME_PROFILE +# Verify that passthrough and spilling happened in the pre and merge agg. +row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\) +row_regex: .*RowsPassedThrough: .* \([1-9][0-9]*\) +==== +---- QUERY +# Test aggregation spill with group_concat distinct +set buffer_pool_limit=50m; +select l_orderkey, count(*), group_concat(distinct l_linestatus, '|') +from lineitem +group by 1 +order by 1 limit 10 +---- RESULTS +1,6,'O' +2,1,'O' +3,6,'F' +4,1,'O' +5,3,'F' +6,1,'F' +7,7,'O' +32,6,'O' +33,4,'F' +34,3,'O' +---- TYPES +BIGINT, BIGINT, STRING +---- RUNTIME_PROFILE +# Verify that at least one of the aggs spilled. +row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\) +==== +---- QUERY +# Regression test for IMPALA-2612. The following query will cause CastToChar +# to be invoked when building the hash tables in partitioned aggregation +# nodes. CastToChar will do "local" memory allocation. Without the fix of +# IMPALA-2612, the peak memory consumption will be higher. +set mem_limit=800m; +set num_scanner_threads=1; +select count(distinct concat(cast(l_comment as char(120)), cast(l_comment as char(120)), + cast(l_comment as char(120)), cast(l_comment as char(120)), + cast(l_comment as char(120)), cast(l_comment as char(120)))) +from lineitem +---- RESULTS +4502054 +---- TYPES +BIGINT +---- RUNTIME_PROFILE +# Verify that the agg spilled. +row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\) +==== +---- QUERY +# Test aggregation with minimum required reservation to exercise IMPALA-2708. +# Merge aggregation requires 17 buffers. The buffer size is 256k for this test. +set buffer_pool_limit=4352k; +select count(*) +from (select distinct * from orders) t +---- TYPES +BIGINT +---- RESULTS +1500000 +---- RUNTIME_PROFILE +row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\) +==== +---- QUERY +# IMPALA-3304: test that avg() can spill with a query mem limit. +# This test only covers that use FIXED_UDA_INTERMEDIATE, not functions that allocate +# strings for intermediate values. +set mem_limit=200m; +select l_orderkey, avg(l_tax), avg(l_quantity), avg(l_discount), avg(l_extendedprice) +from tpch_parquet.lineitem +group by 1 +order by avg(l_extendedprice) desc, avg(l_discount) +limit 5 +---- TYPES +BIGINT,DECIMAL,DECIMAL,DECIMAL,DECIMAL +---- RESULTS +3811460,0.05,50.00,0.05,104899.50 +1744195,0.04,50.00,0.09,104649.50 +5151266,0.07,50.00,0.00,104449.50 +4571042,0.03,50.00,0.09,104399.50 +1198304,0.01,50.00,0.02,104299.50 +---- RUNTIME_PROFILE +row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\) +==== http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/852e1bb7/testdata/workloads/functional-query/queries/QueryTest/spilling.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/spilling.test b/testdata/workloads/functional-query/queries/QueryTest/spilling.test index 2f81a9a..b6f4f12 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/spilling.test +++ b/testdata/workloads/functional-query/queries/QueryTest/spilling.test @@ -1,129 +1,5 @@ ==== ---- QUERY -set buffer_pool_limit=10m; -select l_orderkey, count(*) -from lineitem -group by 1 -order by 1 limit 10 ----- RESULTS -1,6 -2,1 -3,6 -4,1 -5,3 -6,1 -7,7 -32,6 -33,4 -34,3 ----- TYPES -BIGINT, BIGINT ----- RUNTIME_PROFILE -# Verify that spilling and passthrough were activated. -row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\) -row_regex: .*RowsPassedThrough: .* \([1-9][0-9]*\) -==== ----- QUERY -# Test query with string grouping column and string agg columns -set buffer_pool_limit=10m; -set num_nodes=1; -select l_returnflag, l_orderkey, avg(l_tax), min(l_shipmode) -from lineitem -group by 1,2 -order by 1,2 limit 3 ----- RESULTS -'A',3,0.05,'RAIL' -'A',5,0.03,'AIR' -'A',6,0.03,'TRUCK' ----- TYPES -STRING, BIGINT, DECIMAL, STRING ----- RUNTIME_PROFILE -row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\) -row_regex: .*NumRepartitions: .* \([1-9][0-9]*\) -==== ----- QUERY -set buffer_pool_limit=10m; -select l_orderkey, count(*) -from lineitem -group by 1 -order by 1 limit 10; ----- RESULTS -1,6 -2,1 -3,6 -4,1 -5,3 -6,1 -7,7 -32,6 -33,4 -34,3 ----- TYPES -BIGINT, BIGINT ----- RUNTIME_PROFILE -row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\) -row_regex: .*RowsPassedThrough: .* \([1-9][0-9]*\) -==== ----- QUERY -# Test query with string grouping column -set buffer_pool_limit=10m; -set num_nodes=1; -select l_comment, count(*) -from lineitem -group by 1 -order by count(*) desc limit 5 ----- RESULTS -' furiously',943 -' carefully',893 -' carefully ',875 -'carefully ',854 -' furiously ',845 ----- TYPES -STRING, BIGINT ----- RUNTIME_PROFILE -row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\) -row_regex: .*NumRepartitions: .* \([1-9][0-9]*\) -==== ----- QUERY -# Test query with string grouping column and string agg columns -set buffer_pool_limit=10m; -set num_nodes=1; -select l_returnflag, l_orderkey, round(avg(l_tax),2), min(l_shipmode) -from lineitem -group by 1,2 -order by 1,2 limit 3; ----- RESULTS -'A',3,0.05,'RAIL' -'A',5,0.03,'AIR' -'A',6,0.03,'TRUCK' ----- TYPES -STRING, BIGINT, DECIMAL, STRING ----- RUNTIME_PROFILE -# Verify that spilling happened in the agg. -row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\) -row_regex: .*NumRepartitions: .* \([1-9][0-9]*\) -==== ----- QUERY -# Test with string intermediate state (avg() uses string intermediate value). -set buffer_pool_limit=10m; -select l_orderkey, avg(l_orderkey) -from lineitem -group by 1 -order by 1 limit 5 ----- RESULTS -1,1 -2,2 -3,3 -4,4 -5,5 ----- TYPES -BIGINT, DOUBLE ----- RUNTIME_PROFILE -# Verify that passthrough and spilling happened in the pre and merge agg. -row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\) -row_regex: .*RowsPassedThrough: .* \([1-9][0-9]*\) -==== ----- QUERY set buffer_pool_limit=15m; select count(l1.l_tax) from @@ -333,49 +209,6 @@ string, bigint row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\) ==== ---- QUERY -# Test aggregation spill with group_concat distinct -set buffer_pool_limit=50m; -select l_orderkey, count(*), group_concat(distinct l_linestatus, '|') -from lineitem -group by 1 -order by 1 limit 10 ----- RESULTS -1,6,'O' -2,1,'O' -3,6,'F' -4,1,'O' -5,3,'F' -6,1,'F' -7,7,'O' -32,6,'O' -33,4,'F' -34,3,'O' ----- TYPES -BIGINT, BIGINT, STRING ----- RUNTIME_PROFILE -# Verify that at least one of the aggs spilled. -row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\) -==== ----- QUERY -# Regression test for IMPALA-2612. The following query will cause CastToChar -# to be invoked when building the hash tables in partitioned aggregation -# nodes. CastToChar will do "local" memory allocation. Without the fix of -# IMPALA-2612, the peak memory consumption will be higher. -set mem_limit=800m; -set num_scanner_threads=1; -select count(distinct concat(cast(l_comment as char(120)), cast(l_comment as char(120)), - cast(l_comment as char(120)), cast(l_comment as char(120)), - cast(l_comment as char(120)), cast(l_comment as char(120)))) -from lineitem ----- RESULTS -4502054 ----- TYPES -BIGINT ----- RUNTIME_PROFILE -# Verify that the agg spilled. -row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\) -==== ----- QUERY # IMPALA-1346/IMPALA-1546: fix sorter memory management so that it can complete # successfully when in same pipeline as a spilling join. set buffer_pool_limit=50m; @@ -510,16 +343,3 @@ bigint,bigint,bigint,int,decimal,decimal,decimal,decimal,string,string,string,st 1382,156162,6163,5,31.00,37762.96,0.07,0.03,'R','F','1993-10-26','1993-10-15','1993-11-09','TAKE BACK RETURN','FOB','hely regular dependencies. f' 1509,186349,3904,6,31.00,44495.54,0.04,0.03,'A','F','1993-07-14','1993-08-21','1993-08-06','COLLECT COD','SHIP','ic deposits cajole carefully. quickly bold ' ==== ----- QUERY -# Test aggregation with minimum required reservation to exercise IMPALA-2708. -# Merge aggregation requires 17 buffers. The buffer size is 256k for this test. -set buffer_pool_limit=4352k; -select count(*) -from (select distinct * from orders) t ----- TYPES -BIGINT ----- RESULTS -1500000 ----- RUNTIME_PROFILE -row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\) -==== http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/852e1bb7/testdata/workloads/functional-query/queries/QueryTest/uda.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/uda.test b/testdata/workloads/functional-query/queries/QueryTest/uda.test index 932b94a..ef3f1a6 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/uda.test +++ b/testdata/workloads/functional-query/queries/QueryTest/uda.test @@ -139,4 +139,26 @@ group by 100,NULL,NULL,99,5.4994 ---- TYPES decimal,decimal,bigint,bigint,decimal -==== \ No newline at end of file +==== +---- QUERY +# Test that char intermediate works as expected. The function char_intermediate_sum() +# computes the sum with an intermediate int. +select year, month, day, char_intermediate_sum(int_col), sum(int_col) +from functional.alltypesagg +group by year, month, day +order by year, month, day +---- RESULTS +2010,1,1,499500,499500 +2010,1,2,499500,499500 +2010,1,3,499500,499500 +2010,1,4,499500,499500 +2010,1,5,499500,499500 +2010,1,6,499500,499500 +2010,1,7,499500,499500 +2010,1,8,499500,499500 +2010,1,9,499500,499500 +2010,1,10,499500,499500 +2010,1,NULL,495000,495000 +---- TYPES +int,int,int,int,bigint +==== http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/852e1bb7/tests/query_test/test_spilling.py ---------------------------------------------------------------------- diff --git a/tests/query_test/test_spilling.py b/tests/query_test/test_spilling.py index ce6f446..ee366d4 100644 --- a/tests/query_test/test_spilling.py +++ b/tests/query_test/test_spilling.py @@ -49,6 +49,9 @@ class TestSpilling(ImpalaTestSuite): def test_spilling(self, vector): self.run_test_case('QueryTest/spilling', vector) + def test_spilling_aggs(self, vector): + self.run_test_case('QueryTest/spilling-aggs', vector) + def test_spilling_sorts_exhaustive(self, vector): if self.exploration_strategy() != 'exhaustive': pytest.skip("only run large sorts on exhaustive") http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/852e1bb7/tests/query_test/test_udfs.py ---------------------------------------------------------------------- diff --git a/tests/query_test/test_udfs.py b/tests/query_test/test_udfs.py index eab5994..37d689f 100644 --- a/tests/query_test/test_udfs.py +++ b/tests/query_test/test_udfs.py @@ -109,6 +109,11 @@ create aggregate function {database}.agg_string_intermediate(decimal(20,10), big returns decimal(20,0) intermediate string location '{location}' init_fn='AggStringIntermediateInit' update_fn='AggStringIntermediateUpdate' merge_fn='AggStringIntermediateMerge' finalize_fn='AggStringIntermediateFinalize'; + +create aggregate function {database}.char_intermediate_sum(int) returns int +intermediate char(10) LOCATION '{location}' update_fn='AggCharIntermediateUpdate' +init_fn='AggCharIntermediateInit' merge_fn='AggCharIntermediateMerge' +serialize_fn='AggCharIntermediateSerialize' finalize_fn='AggCharIntermediateFinalize'; """ # Create test UDF functions in {database} from library {location}
