http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d7246d64/testdata/workloads/functional-query/queries/QueryTest/java-udf.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/java-udf.test b/testdata/workloads/functional-query/queries/QueryTest/java-udf.test index c473fdf..c4e2f1c 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/java-udf.test +++ b/testdata/workloads/functional-query/queries/QueryTest/java-udf.test @@ -1,20 +1,20 @@ ==== ---- QUERY -select udf_test.hive_pi() +select hive_pi() ---- RESULTS 3.141592653589793 ---- TYPES DOUBLE ==== ---- QUERY -select udf_test.hive_bin(100) +select hive_bin(100) ---- RESULTS '1100100' ---- TYPES STRING ==== ---- QUERY -select min(udf_test.hive_pi()) from functional.alltypesagg +select min(hive_pi()) from functional.alltypesagg ---- RESULTS 3.141592653589793 ---- TYPES @@ -22,49 +22,49 @@ DOUBLE ==== ---- QUERY # Test identity functions -select udf_test.identity(true); +select identity(true); ---- TYPES boolean ---- RESULTS true ==== ---- QUERY -select udf_test.identity(cast(10 as tinyint)); +select identity(cast(10 as tinyint)); ---- TYPES tinyint ---- RESULTS 10 ==== ---- QUERY -select udf_test.identity(cast(10 as smallint)); +select identity(cast(10 as smallint)); ---- TYPES smallint ---- RESULTS 10 ==== ---- QUERY -select udf_test.identity(cast(10 as int)); +select identity(cast(10 as int)); ---- TYPES int ---- RESULTS 10 ==== ---- QUERY -select udf_test.identity(cast(10 as bigint)); +select identity(cast(10 as bigint)); ---- TYPES bigint ---- RESULTS 10 ==== ---- QUERY -select udf_test.identity(cast(10.0 as float)); +select identity(cast(10.0 as float)); ---- TYPES float ---- RESULTS 10 ==== ---- QUERY -select udf_test.identity(cast(10.0 as double)); +select identity(cast(10.0 as double)); ---- TYPES double ---- RESULTS @@ -73,16 +73,16 @@ double ---- QUERY # IMPALA-1456. Each "identity" call below tests a different type (BytesWritable, Text, # and String). -select udf_test.identity("why hello there"), - udf_test.identity("why", " hello there"), - udf_test.identity("why", " hello", " there"); +select identity("why hello there"), + identity("why", " hello there"), + identity("why", " hello", " there"); ---- TYPES string, string, string ---- RESULTS 'why hello there','why hello there','why hello there' ==== ---- QUERY -select udf_test.identity(NULL); +select identity(NULL); ---- TYPES boolean ---- RESULTS @@ -91,9 +91,9 @@ NULL ---- QUERY # IMPALA-1134. Each "identity" call below tests a different type (BytesWritable, Text, # and String). The different types are handled slightly differently. -select length(udf_test.identity("0123456789")), - length(udf_test.identity("0123456789", "0123456789")), - length(udf_test.identity("0123456789", "0123456789", "0123456789")); +select length(identity("0123456789")), + length(identity("0123456789", "0123456789")), + length(identity("0123456789", "0123456789", "0123456789")); ---- TYPES int, int, int ---- RESULTS @@ -101,14 +101,14 @@ int, int, int ==== ---- QUERY # IMPALA-1392: Hive UDFs that throw exceptions should return NULL -select udf_test.throws_exception(); +select throws_exception(); ---- TYPES boolean ---- RESULTS NULL ==== ---- QUERY -select udf_test.throws_exception() from functional.alltypestiny; +select throws_exception() from functional.alltypestiny; ---- TYPES boolean ---- RESULTS @@ -122,49 +122,49 @@ NULL NULL ==== ---- QUERY -select udf_test.hive_add(cast(1 as int), cast(2 as int)); +select hive_add(cast(1 as int), cast(2 as int)); ---- TYPES int ---- RESULTS 3 ==== ---- QUERY -select udf_test.hive_add(udf_test.hive_add(cast(1 as int), cast(2 as int)), cast(2 as int)); +select hive_add(hive_add(cast(1 as int), cast(2 as int)), cast(2 as int)); ---- TYPES int ---- RESULTS 5 ==== ---- QUERY -select udf_test.hive_add(cast(udf_test.hive_add(cast(1 as int), cast(2 as int)) - udf_test.hive_add(cast(2 as int), cast(1 as int)) as int), cast(2 as int)); +select hive_add(cast(hive_add(cast(1 as int), cast(2 as int)) - hive_add(cast(2 as int), cast(1 as int)) as int), cast(2 as int)); ---- TYPES int ---- RESULTS 2 ==== ---- QUERY -select udf_test.hive_add(cast(1 as smallint), cast(2 as smallint)); +select hive_add(cast(1 as smallint), cast(2 as smallint)); ---- TYPES smallint ---- RESULTS 3 ==== ---- QUERY -select udf_test.hive_add(cast(1.0 as float), cast(2.0 as float)); +select hive_add(cast(1.0 as float), cast(2.0 as float)); ---- TYPES float ---- RESULTS 3.0 ==== ---- QUERY -select udf_test.hive_add(cast(1.0 as double), cast(2.0 as double)); +select hive_add(cast(1.0 as double), cast(2.0 as double)); ---- TYPES double ---- RESULTS 3.0 ==== ---- QUERY -select udf_test.hive_add(cast(1 as boolean), cast(0 as boolean)); +select hive_add(cast(1 as boolean), cast(0 as boolean)); ---- TYPES boolean ---- RESULTS @@ -172,63 +172,63 @@ false ==== ---- QUERY # Testing whether all of persistent Java udfs are accessible. -select java_udfs_test.identity(true); +select identity_anytype(true); ---- TYPES boolean ---- RESULTS true ==== ---- QUERY -select java_udfs_test.identity(cast(10 as tinyint)); +select identity_anytype(cast(10 as tinyint)); ---- TYPES tinyint ---- RESULTS 10 ==== ---- QUERY -select java_udfs_test.identity(cast(10 as smallint)); +select identity_anytype(cast(10 as smallint)); ---- TYPES smallint ---- RESULTS 10 ==== ---- QUERY -select java_udfs_test.identity(cast(10 as int)); +select identity_anytype(cast(10 as int)); ---- TYPES int ---- RESULTS 10 ==== ---- QUERY -select java_udfs_test.identity(cast(10 as bigint)); +select identity_anytype(cast(10 as bigint)); ---- TYPES bigint ---- RESULTS 10 ==== ---- QUERY -select java_udfs_test.identity(cast(10.0 as float)); +select identity_anytype(cast(10.0 as float)); ---- TYPES float ---- RESULTS 10 ==== ---- QUERY -select java_udfs_test.identity(cast(10.0 as double)); +select identity_anytype(cast(10.0 as double)); ---- TYPES double ---- RESULTS 10 ==== ---- QUERY -select java_udfs_test.identity("a", "b"); +select identity_anytype("a", "b"); ---- TYPES string ---- RESULTS 'ab' ==== ---- QUERY -select java_udfs_test.identity("a", "b", "c"); +select identity_anytype("a", "b", "c"); ---- TYPES string ---- RESULTS @@ -238,37 +238,37 @@ string # IMPALA-3378: test many Java UDFs being opened and run concurrently select * from (select max(int_col) from functional.alltypesagg - where udf_test.identity(bool_col) union all + where identity(bool_col) union all (select max(int_col) from functional.alltypesagg - where udf_test.identity(tinyint_col) > 1 union all + where identity(tinyint_col) > 1 union all (select max(int_col) from functional.alltypesagg - where udf_test.identity(smallint_col) > 1 union all + where identity(smallint_col) > 1 union all (select max(int_col) from functional.alltypesagg - where udf_test.identity(int_col) > 1 union all + where identity(int_col) > 1 union all (select max(int_col) from functional.alltypesagg - where udf_test.identity(bigint_col) > 1 union all + where identity(bigint_col) > 1 union all (select max(int_col) from functional.alltypesagg - where udf_test.identity(float_col) > 1.0 union all + where identity(float_col) > 1.0 union all (select max(int_col) from functional.alltypesagg - where udf_test.identity(double_col) > 1.0 union all + where identity(double_col) > 1.0 union all (select max(int_col) from functional.alltypesagg - where udf_test.identity(string_col) > '1' union all + where identity(string_col) > '1' union all (select max(int_col) from functional.alltypesagg - where not udf_test.identity(bool_col) union all + where not identity(bool_col) union all (select max(int_col) from functional.alltypesagg - where udf_test.identity(tinyint_col) > 2 union all + where identity(tinyint_col) > 2 union all (select max(int_col) from functional.alltypesagg - where udf_test.identity(smallint_col) > 2 union all + where identity(smallint_col) > 2 union all (select max(int_col) from functional.alltypesagg - where udf_test.identity(int_col) > 2 union all + where identity(int_col) > 2 union all (select max(int_col) from functional.alltypesagg - where udf_test.identity(bigint_col) > 2 union all + where identity(bigint_col) > 2 union all (select max(int_col) from functional.alltypesagg - where udf_test.identity(float_col) > 2.0 union all + where identity(float_col) > 2.0 union all (select max(int_col) from functional.alltypesagg - where udf_test.identity(double_col) > 2.0 union all + where identity(double_col) > 2.0 union all (select max(int_col) from functional.alltypesagg - where udf_test.identity(string_col) > '2' + where identity(string_col) > '2' )))))))))))))))) v ---- TYPES INT @@ -301,7 +301,7 @@ values('toast'), ('scone'), ('stuff'), ('sssss'), ('yes'), ('scone'), ('stuff'); # Regression test for IMPALA-4266: memory management bugs with output strings from # Java UDFS, exposed by using the UDF as a grouping key in an aggregation. # The UDF replaces "s" with "ss" in the strings. -select distinct udf_test.replace_string(_c0) as es +select distinct replace_string(_c0) as es from replace_string_input order by 1; ---- TYPES
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d7246d64/testdata/workloads/functional-query/queries/QueryTest/libs_with_same_filenames.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/libs_with_same_filenames.test b/testdata/workloads/functional-query/queries/QueryTest/libs_with_same_filenames.test index cb46d9c..64fdced 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/libs_with_same_filenames.test +++ b/testdata/workloads/functional-query/queries/QueryTest/libs_with_same_filenames.test @@ -1,24 +1,21 @@ ==== ---- QUERY -drop function if exists same_lib_filename_udf_test.no_args(); -drop function if exists same_lib_filename_udf_test.no_args2(); - -create function same_lib_filename_udf_test.no_args() returns string +create function no_args() returns string location '$FILESYSTEM_PREFIX/test-warehouse/libTestUdfs.so' symbol='NoArgs'; -create function same_lib_filename_udf_test.no_args2() returns string +create function no_args2() returns string location '$FILESYSTEM_PREFIX/test-warehouse/udf_test/libTestUdfs.so' symbol='NoArgs'; ---- RESULTS ==== ---- QUERY -select same_lib_filename_udf_test.no_args(); +select no_args(); ---- TYPES string ---- RESULTS 'string' ==== ---- QUERY -select same_lib_filename_udf_test.no_args2(); +select no_args2(); ---- TYPES string ---- RESULTS http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d7246d64/testdata/workloads/functional-query/queries/QueryTest/load-java-udfs.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/load-java-udfs.test b/testdata/workloads/functional-query/queries/QueryTest/load-java-udfs.test index ab0c8bb..5ff4488 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/load-java-udfs.test +++ b/testdata/workloads/functional-query/queries/QueryTest/load-java-udfs.test @@ -1,130 +1,99 @@ ==== ---- QUERY -drop function if exists udf_test.hive_pi(); -drop function if exists udf_test.hive_round(double); -drop function if exists udf_test.hive_floor(double); -drop function if exists udf_test.hive_mod(int, int); -drop function if exists udf_test.hive_bin(bigint); -drop function if exists udf_test.hive_lower(string); - -drop function if exists udf_test.identity(boolean); -drop function if exists udf_test.identity(tinyint); -drop function if exists udf_test.identity(smallint); -drop function if exists udf_test.identity(int); -drop function if exists udf_test.identity(bigint); -drop function if exists udf_test.identity(float); -drop function if exists udf_test.identity(double); -drop function if exists udf_test.identity(string); -drop function if exists udf_test.identity(string, string); -drop function if exists udf_test.identity(string, string, string); -drop function if exists udf_test.identity(timestamp); - -drop function if exists udf_test.hive_add(int, int); -drop function if exists udf_test.hive_add(float, float); -drop function if exists udf_test.hive_add(double, double); -drop function if exists udf_test.hive_add(smallint, smallint); -drop function if exists udf_test.hive_add(boolean, boolean); - -drop function if exists udf_test.throws_exception(); - -drop function if exists java_udfs_test.identity; - -drop function if exists udf_test.replace_string(string); - -create function udf_test.hive_pi() returns double +create function hive_pi() returns double location '$FILESYSTEM_PREFIX/test-warehouse/hive-exec.jar' symbol='org.apache.hadoop.hive.ql.udf.UDFPI'; -create function udf_test.hive_round(double) returns double +create function hive_round(double) returns double location '$FILESYSTEM_PREFIX/test-warehouse/hive-exec.jar' symbol='org.apache.hadoop.hive.ql.udf.UDFRound'; -create function udf_test.hive_floor(double) returns bigint +create function hive_floor(double) returns bigint location '$FILESYSTEM_PREFIX/test-warehouse/hive-exec.jar' symbol='org.apache.hadoop.hive.ql.udf.UDFFloor'; -create function udf_test.hive_mod(int, int) returns int +create function hive_mod(int, int) returns int location '$FILESYSTEM_PREFIX/test-warehouse/hive-exec.jar' symbol='org.apache.hadoop.hive.ql.udf.UDFPosMod'; -create function udf_test.hive_bin(bigint) returns string +create function hive_bin(bigint) returns string location '$FILESYSTEM_PREFIX/test-warehouse/hive-exec.jar' symbol='org.apache.hadoop.hive.ql.udf.UDFBin'; -create function udf_test.hive_lower(string) returns string +create function hive_lower(string) returns string location '$FILESYSTEM_PREFIX/test-warehouse/hive-exec.jar' symbol='org.apache.hadoop.hive.ql.udf.UDFLower'; # Used to test persistent java functions -create function java_udfs_test.identity +create function identity_anytype location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar' symbol='org.apache.impala.TestUdf'; -create function udf_test.identity(boolean) returns boolean +create function identity(boolean) returns boolean location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar' symbol='org.apache.impala.TestUdf'; -create function udf_test.identity(tinyint) returns tinyint +create function identity(tinyint) returns tinyint location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar' symbol='org.apache.impala.TestUdf'; -create function udf_test.identity(smallint) returns smallint +create function identity(smallint) returns smallint location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar' symbol='org.apache.impala.TestUdf'; -create function udf_test.identity(int) returns int +create function identity(int) returns int location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar' symbol='org.apache.impala.TestUdf'; -create function udf_test.identity(bigint) returns bigint +create function identity(bigint) returns bigint location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar' symbol='org.apache.impala.TestUdf'; -create function udf_test.identity(float) returns float +create function identity(float) returns float location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar' symbol='org.apache.impala.TestUdf'; -create function udf_test.identity(double) returns double +create function identity(double) returns double location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar' symbol='org.apache.impala.TestUdf'; -create function udf_test.identity(string) returns string +create function identity(string) returns string location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar' symbol='org.apache.impala.TestUdf'; -create function udf_test.identity(string, string) returns string +create function identity(string, string) returns string location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar' symbol='org.apache.impala.TestUdf'; -create function udf_test.identity(string, string, string) returns string +create function identity(string, string, string) returns string location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar' symbol='org.apache.impala.TestUdf'; -create function udf_test.hive_add(int, int) returns int +create function hive_add(int, int) returns int location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar' symbol='org.apache.impala.TestUdf'; -create function udf_test.hive_add(smallint, smallint) returns smallint +create function hive_add(smallint, smallint) returns smallint location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar' symbol='org.apache.impala.TestUdf'; -create function udf_test.hive_add(float, float) returns float +create function hive_add(float, float) returns float location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar' symbol='org.apache.impala.TestUdf'; -create function udf_test.hive_add(double, double) returns double +create function hive_add(double, double) returns double location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar' symbol='org.apache.impala.TestUdf'; -create function udf_test.hive_add(boolean, boolean) returns boolean +create function hive_add(boolean, boolean) returns boolean location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar' symbol='org.apache.impala.TestUdf'; -create function udf_test.throws_exception() returns boolean +create function throws_exception() returns boolean location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar' symbol='org.apache.impala.TestUdfException'; -create function udf_test.replace_string(string) returns string +create function replace_string(string) returns string location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar' symbol='org.apache.impala.ReplaceStringUdf'; ==== http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d7246d64/testdata/workloads/functional-query/queries/QueryTest/uda-mem-limit.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/uda-mem-limit.test b/testdata/workloads/functional-query/queries/QueryTest/uda-mem-limit.test index 3a21274..b7c2f5f 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/uda-mem-limit.test +++ b/testdata/workloads/functional-query/queries/QueryTest/uda-mem-limit.test @@ -1,8 +1,5 @@ ==== ---- QUERY -create database if not exists native_function_test; -use native_function_test; - drop function if exists agg_memtest(bigint); create aggregate function agg_memtest(bigint) returns bigint http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d7246d64/testdata/workloads/functional-query/queries/QueryTest/uda.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/uda.test b/testdata/workloads/functional-query/queries/QueryTest/uda.test index 21877cf..05e24e7 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/uda.test +++ b/testdata/workloads/functional-query/queries/QueryTest/uda.test @@ -41,3 +41,31 @@ true ---- TYPES boolean ==== +---- QUERY +# Test with even number of input rows. +select toggle_null(id), count(*) +from functional_parquet.alltypesagg +---- RESULTS +NULL,11000 +---- TYPES +int,bigint +==== +---- QUERY +# Test with odd number of input rows. +select toggle_null(id), count(*) +from functional_parquet.alltypesagg +where id <= 9998 +---- RESULTS +1,10999 +---- TYPES +int,bigint +==== +---- QUERY +# Test that input NULLs are passed to aggregate functions ok. +select count_nulls(tinyint_col), count(*) +from functional.alltypesagg +---- RESULTS +2000,11000 +---- TYPES +bigint,bigint +==== http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d7246d64/testdata/workloads/functional-query/queries/QueryTest/udf-codegen-required.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/udf-codegen-required.test b/testdata/workloads/functional-query/queries/QueryTest/udf-codegen-required.test new file mode 100644 index 0000000..07c93c3 --- /dev/null +++ b/testdata/workloads/functional-query/queries/QueryTest/udf-codegen-required.test @@ -0,0 +1,10 @@ +==== +---- QUERY +# Codegen is required for > 20 args. +select twenty_one_args(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21); +---- TYPES +INT +---- RESULTS +231 +==== + http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d7246d64/testdata/workloads/functional-query/queries/QueryTest/udf-errors.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/udf-errors.test b/testdata/workloads/functional-query/queries/QueryTest/udf-errors.test index 948d584..73aee7e 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/udf-errors.test +++ b/testdata/workloads/functional-query/queries/QueryTest/udf-errors.test @@ -1,23 +1,19 @@ ==== ---- QUERY -create database if not exists udf_test_errors; ----- RESULTS -==== ----- QUERY -create function if not exists udf_test_errors.hive_pi() returns double +create function if not exists hive_pi() returns double location '$FILESYSTEM_PREFIX/test-warehouse/hive-exec.jar' symbol='org.apache.hadoop.hive.ql.udf.UDFPI'; ---- RESULTS ==== ---- QUERY -create function if not exists udf_test_errors.foo() returns double +create function if not exists foo() returns double location '$FILESYSTEM_PREFIX/test-warehouse/not-a-real-file.so' symbol='FnDoesNotExist'; ---- CATCH Could not load binary: $FILESYSTEM_PREFIX/test-warehouse/not-a-real-file.so ==== ---- QUERY -create function if not exists udf_test_errors.foo() returns double +create function if not exists foo() returns double location '$FILESYSTEM_PREFIX/test-warehouse/not-a-real-file.so' symbol='FnDoesNotExist'; ---- CATCH @@ -25,7 +21,7 @@ Could not load binary: $FILESYSTEM_PREFIX/test-warehouse/not-a-real-file.so ==== ---- QUERY # This test is run with codegen disabled. Interpretation only handles up to 20 arguments. -create function if not exists udf_test_errors.twenty_args(int, int, int, int, int, int, +create function if not exists twenty_args(int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int) returns int location '$FILESYSTEM_PREFIX/test-warehouse/libTestUdfs.so' symbol='TwentyArgs'; @@ -33,7 +29,7 @@ symbol='TwentyArgs'; ==== ---- QUERY # Verifies that interpretation can support up to 20 arguments -select udf_test_errors.twenty_args(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20); +select twenty_args(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20); ---- TYPES INT ---- RESULTS @@ -41,7 +37,7 @@ INT ==== ---- QUERY # This test is run with codegen disabled. Interpretation only handles up to 20 arguments. -create function if not exists udf_test_errors.twenty_one_args(int, int, int, int, int, int, +create function if not exists twenty_one_args(int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int) returns int location '$FILESYSTEM_PREFIX/test-warehouse/libTestUdfs.so' symbol='TwentyOneArgs'; @@ -49,35 +45,36 @@ symbol='TwentyOneArgs'; ==== ---- QUERY # Verifies that interpretation fails with more than 20 arguments. -select udf_test_errors.twenty_one_args(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21); +select twenty_one_args(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21); ---- CATCH Cannot interpret native UDF 'twenty_one_args': number of arguments is more than 20. Codegen is needed. Please set DISABLE_CODEGEN to false. ==== ---- QUERY # This test is run with codegen disabled. IR UDF will fail. -create function if not exists udf_test_errors.nine_args_ir(int, int, int, int, int, int, +create function if not exists nine_args_ir(int, int, int, int, int, int, int, int, int) returns int location '$FILESYSTEM_PREFIX/test-warehouse/test-udfs.ll' symbol='NineArgs'; ---- RESULTS ==== ---- QUERY -select udf_test_errors.nine_args_ir(1,2,3,4,5,6,7,8,9); +select nine_args_ir(1,2,3,4,5,6,7,8,9); ---- CATCH Cannot interpret LLVM IR UDF 'nine_args_ir': Codegen is needed. Please set DISABLE_CODEGEN to false. ==== ---- QUERY -drop database udf_test_errors; +use default; +drop database $DATABASE; ---- CATCH -Cannot drop non-empty database: udf_test_errors +Cannot drop non-empty database: ==== ---- QUERY -drop function udf_test_errors.hive_pi(); -drop function udf_test_errors.twenty_args(int, int, int, int, int, int, int, int, +use $DATABASE; +drop function hive_pi(); +drop function twenty_args(int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int); -drop function udf_test_errors.twenty_one_args(int, int, int, int, int, int, int, int, +drop function twenty_one_args(int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int); -drop function udf_test_errors.nine_args_ir(int, int, int, int, int, int, int, int, int); -drop database udf_test_errors; +drop function nine_args_ir(int, int, int, int, int, int, int, int, int); ---- RESULTS ==== http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d7246d64/testdata/workloads/functional-query/queries/QueryTest/udf-mem-limit.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/udf-mem-limit.test b/testdata/workloads/functional-query/queries/QueryTest/udf-mem-limit.test index f9b8d4b..3f23aeb 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/udf-mem-limit.test +++ b/testdata/workloads/functional-query/queries/QueryTest/udf-mem-limit.test @@ -1,10 +1,5 @@ ==== ---- QUERY -create database if not exists native_function_test; -use native_function_test; - -drop function if exists memtest(bigint); - create function memtest(bigint) returns bigint location '$FILESYSTEM_PREFIX/test-warehouse/libTestUdfs.so' symbol='MemTest' prepare_fn='MemTestPrepare' close_fn='MemTestClose'; http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d7246d64/testdata/workloads/functional-query/queries/QueryTest/udf.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/udf.test b/testdata/workloads/functional-query/queries/QueryTest/udf.test index 5cbbecb..ef777a1 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/udf.test +++ b/testdata/workloads/functional-query/queries/QueryTest/udf.test @@ -536,10 +536,3 @@ INT ---- RESULTS 210 ==== ----- QUERY -select twenty_one_args(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21); ----- TYPES -INT ----- RESULTS -231 -==== http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d7246d64/tests/common/test_result_verifier.py ---------------------------------------------------------------------- diff --git a/tests/common/test_result_verifier.py b/tests/common/test_result_verifier.py index 6bf991e..80daf09 100644 --- a/tests/common/test_result_verifier.py +++ b/tests/common/test_result_verifier.py @@ -482,3 +482,25 @@ def verify_runtime_profile(expected, actual): assert len(unmatched_lines) == 0, ("Did not find matches for lines in runtime profile:" "\nEXPECTED LINES:\n%s\n\nACTUAL PROFILE:\n%s" % ('\n'.join(unmatched_lines), actual)) + +def get_node_exec_options(profile_string, exec_node_id): + """ Return a list with all of the ExecOption strings for the given exec node id. """ + results = [] + matched_node = False + id_string = "(id={0})".format(exec_node_id) + for line in profile_string.splitlines(): + if matched_node and line.strip().startswith("ExecOption:"): + results.append(line.strip()) + matched_node = False + if id_string in line: + # Check for the ExecOption string on the next line. + matched_node = True + return results + +def assert_codegen_enabled(profile_string, exec_node_ids): + """ Check that codegen is enabled for the given exec node ids by parsing the text + runtime profile in 'profile_string'""" + for exec_node_id in exec_node_ids: + for exec_options in get_node_exec_options(profile_string, exec_node_id): + assert 'Codegen Enabled' in exec_options + assert not 'Codegen Disabled' in exec_options http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d7246d64/tests/query_test/test_aggregation.py ---------------------------------------------------------------------- diff --git a/tests/query_test/test_aggregation.py b/tests/query_test/test_aggregation.py index 82d50c8..c44c784 100644 --- a/tests/query_test/test_aggregation.py +++ b/tests/query_test/test_aggregation.py @@ -19,28 +19,64 @@ # import pytest +from tests.common.environ import USING_OLD_AGGS_JOINS from tests.common.impala_test_suite import ImpalaTestSuite +from tests.common.skip import SkipIfOldAggsJoins from tests.common.test_dimensions import ( create_exec_option_dimension, create_uncompressed_text_dimension) +from tests.common.test_result_verifier import assert_codegen_enabled from tests.common.test_vector import TestDimension -from tests.common.skip import SkipIfOldAggsJoins - -agg_functions = ['sum', 'count', 'min', 'max', 'avg'] -data_types = ['int', 'bool', 'double', 'bigint', 'tinyint', - 'smallint', 'float', 'timestamp'] +# Test dimensions for TestAggregation. +AGG_FUNCTIONS = ['sum', 'count', 'min', 'max', 'avg', 'ndv'] +DATA_TYPES = ['int', 'bool', 'double', 'bigint', 'tinyint', + 'smallint', 'float', 'timestamp', 'string'] +# Lookup table for TestAggregation results. result_lut = { - # TODO: Add verification for other types 'sum-tinyint': 45000, 'avg-tinyint': 5, 'count-tinyint': 9000, - 'min-tinyint': 1, 'max-tinyint': 9, + 'min-tinyint': 1, 'max-tinyint': 9, 'ndv-tinyint': 9, 'sum-smallint': 495000, 'avg-smallint': 50, 'count-smallint': 9900, - 'min-smallint': 1, 'max-smallint': 99, + 'min-smallint': 1, 'max-smallint': 99, 'ndv-smallint': 99, 'sum-int': 4995000, 'avg-int': 500, 'count-int': 9990, - 'min-int': 1, 'max-int': 999, + 'min-int': 1, 'max-int': 999, 'ndv-int': 999, 'sum-bigint': 49950000, 'avg-bigint': 5000, 'count-bigint': 9990, - 'min-bigint': 10, 'max-bigint': 9990, + 'min-bigint': 10, 'max-bigint' : 9990, 'ndv-bigint': 999, + 'sum-bool': 5000, 'count-bool': 10000, 'min-bool': 'false', + 'max-bool': 'true', 'avg-bool': 0.5, 'ndv-bool': 2, + 'sum-double': 50449500.0, 'count-double': 9990, 'min-double': 10.1, + 'max-double': 10089.9, 'avg-double': 5050.0, 'ndv-double': 999, + 'sum-float': 5494500.0, 'count-float': 9990, 'min-float': 1.10, + 'max-float': 1098.9, 'avg-float': 550.0, 'ndv-float': 999, + 'count-timestamp': 10000, 'min-timestamp': '2010-01-01 00:00:00', + 'max-timestamp': '2010-01-10 18:02:05.100000000', + 'avg-timestamp': '2010-01-05 20:47:11.705080000', 'ndv-timestamp': 10000, + 'count-string': 10000, 'min-string': '0', 'max-string': '999', 'ndv-string': 999, + 'sum-distinct-tinyint': 45, 'count-distinct-tinyint': 9, 'min-distinct-tinyint': 1, + 'max-distinct-tinyint': 9, 'avg-distinct-tinyint': 5, 'ndv-distinct-tinyint': 9, + 'sum-distinct-smallint': 4950, 'count-distinct-smallint': 99, + 'min-distinct-smallint': 1, 'max-distinct-smallint': 99, + 'avg-distinct-smallint': 50, 'ndv-distinct-smallint': 99, + 'sum-distinct-int': 499500, 'count-distinct-int': 999, 'min-distinct-int': 1, + 'max-distinct-int': 999, 'avg-distinct-int': 500, 'ndv-distinct-int': 999, + 'sum-distinct-bigint': 4995000, 'count-distinct-bigint': 999, 'min-distinct-bigint': 10, + 'max-distinct-bigint': 9990, 'avg-distinct-bigint': 5000, + 'ndv-distinct-bigint': 999, + 'sum-distinct-bool': 1, 'count-distinct-bool': 2, 'min-distinct-bool': 'false', + 'max-distinct-bool': 'true', 'avg-distinct-bool': 0.5, 'ndv-distinct-bool': 2, + 'sum-distinct-double': 5044950.0, 'count-distinct-double': 999, + 'min-distinct-double': 10.1, 'max-distinct-double': 10089.9, + 'avg-distinct-double': 5050.0, 'ndv-distinct-double': 999, + 'sum-distinct-float': 549450.0, 'count-distinct-float': 999, 'min-distinct-float': 1.1, + 'max-distinct-float': 1098.9, 'avg-distinct-float': 550.0, + 'ndv-distinct-float': 999, + 'count-distinct-timestamp': 10000, 'min-distinct-timestamp': '2010-01-01 00:00:00', + 'max-distinct-timestamp': '2010-01-10 18:02:05.100000000', + 'avg-distinct-timestamp': '2010-01-05 20:47:11.705080000', + 'ndv-distinct-timestamp': 10000, + 'count-distinct-string': 1000, 'min-distinct-string': '0', + 'max-distinct-string': '999', 'ndv-distinct-string': 999, } class TestAggregation(ImpalaTestSuite): @@ -53,8 +89,8 @@ class TestAggregation(ImpalaTestSuite): super(TestAggregation, cls).add_test_dimensions() # Add two more dimensions - cls.TestMatrix.add_dimension(TestDimension('agg_func', *agg_functions)) - cls.TestMatrix.add_dimension(TestDimension('data_type', *data_types)) + cls.TestMatrix.add_dimension(TestDimension('agg_func', *AGG_FUNCTIONS)) + cls.TestMatrix.add_dimension(TestDimension('data_type', *DATA_TYPES)) cls.TestMatrix.add_constraint(lambda v: cls.is_valid_vector(v)) @classmethod @@ -68,30 +104,63 @@ class TestAggregation(ImpalaTestSuite): if vector.get_value('exec_option')['batch_size'] != 0: return False # Avro doesn't have timestamp type + non_numeric = data_type in ['bool', 'string'] if file_format == 'avro' and data_type == 'timestamp': return False - elif agg_func not in ['min', 'max', 'count'] and data_type == 'bool': + elif non_numeric and agg_func not in ['min', 'max', 'count', 'ndv']: return False elif agg_func == 'sum' and data_type == 'timestamp': return False return True def test_aggregation(self, vector): + exec_option = vector.get_value('exec_option') + disable_codegen = exec_option['disable_codegen'] + # The old aggregation node does not support codegen for all aggregate functions. + check_codegen_enabled = not disable_codegen and not USING_OLD_AGGS_JOINS data_type, agg_func = (vector.get_value('data_type'), vector.get_value('agg_func')) + query = 'select %s(%s_col) from alltypesagg where day is not null' % (agg_func, data_type) - result = self.execute_scalar(query, vector.get_value('exec_option'), - table_format=vector.get_value('table_format')) - if 'int' in data_type: - assert result_lut['%s-%s' % (agg_func, data_type)] == int(result) - - # AVG - if vector.get_value('data_type') == 'timestamp' and\ - vector.get_value('agg_func') == 'avg': - return + result = self.execute_query(query, exec_option, + table_format=vector.get_value('table_format')) + assert len(result.data) == 1 + self.verify_agg_result(agg_func, data_type, False, result.data[0]); + + if check_codegen_enabled: + # Verify codegen was enabled for both stages of the aggregation. + assert_codegen_enabled(result.runtime_profile, [1, 3]) + query = 'select %s(DISTINCT(%s_col)) from alltypesagg where day is not null' % ( agg_func, data_type) - result = self.execute_scalar(query, vector.get_value('exec_option')) + result = self.execute_query(query, vector.get_value('exec_option')) + assert len(result.data) == 1 + self.verify_agg_result(agg_func, data_type, True, result.data[0]); + + if check_codegen_enabled: + # Verify codegen was enabled for all stages of the aggregation. + assert_codegen_enabled(result.runtime_profile, [1, 2, 4, 6]) + + def verify_agg_result(self, agg_func, data_type, distinct, actual_string): + key = '%s-%s%s' % (agg_func, 'distinct-' if distinct else '', data_type) + + if agg_func == 'ndv': + # NDV is inherently approximate. Compare with some tolerance. + err = abs(result_lut[key] - int(actual_string)) + rel_err = err / float(result_lut[key]) + print key, result_lut[key], actual_string,abs(result_lut[key] - int(actual_string)) + assert err <= 1 or rel_err < 0.05 + elif data_type in ('float', 'double') and agg_func != 'count': + # Compare with a margin of error. + delta = 1e6 if data_type == 'double' else 1e3 + assert abs(result_lut[key] - float(actual_string)) < delta + elif data_type == 'timestamp' and agg_func != 'count': + # Strip off everything past 10s of microseconds. + ignore_digits = 4 + assert result_lut[key][:-ignore_digits] == actual_string[:-ignore_digits] + else: + assert str(result_lut[key]) == actual_string + class TestAggregationQueries(ImpalaTestSuite): """Run the aggregation test suite, with codegen enabled and disabled, to exercise our @@ -140,6 +209,7 @@ class TestAggregationQueries(ImpalaTestSuite): first phase is running on multiple nodes). Need to pull the result apart and compare the actual items)""" exec_option = vector.get_value('exec_option') + disable_codegen = exec_option['disable_codegen'] table_format = vector.get_value('table_format') # Test group_concat distinct with other aggregate function and groupings. # expected result is the row: 2010,'1, 2, 3, 4','1-2-3-4','2|3|1|4',40,4 @@ -156,6 +226,10 @@ class TestAggregationQueries(ImpalaTestSuite): assert(set(row[i].split(delimiter[i-1])) == set(['1', '2', '3', '4'])) assert(row[4] == '40') assert(row[5] == '4') + check_codegen_enabled = not disable_codegen and not USING_OLD_AGGS_JOINS + if check_codegen_enabled: + # Verify codegen was enabled for all three stages of the aggregation. + assert_codegen_enabled(result.runtime_profile, [1, 2, 4]) # Test group_concat distinct with arrow delimiter, with multiple rows query = """select day, group_concat(distinct string_col, "->") @@ -185,6 +259,10 @@ class TestAggregationQueries(ImpalaTestSuite): where int_col < 10""" result = self.execute_query(query, exec_option, table_format=table_format) assert(set((result.data)[0].split(" ")) == set(['1','2','3','4','5','6','7','8','9'])) + if check_codegen_enabled: + # Verify codegen was enabled for all four stages of the aggregation. + assert_codegen_enabled(result.runtime_profile, [1, 2, 4, 6]) + class TestTPCHAggregationQueries(ImpalaTestSuite): # Uses the TPC-H dataset in order to have larger aggregations. http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d7246d64/tests/query_test/test_udfs.py ---------------------------------------------------------------------- diff --git a/tests/query_test/test_udfs.py b/tests/query_test/test_udfs.py index 0497cbd..5e9fdd9 100644 --- a/tests/query_test/test_udfs.py +++ b/tests/query_test/test_udfs.py @@ -24,7 +24,7 @@ from tests.common.impala_cluster import ImpalaCluster from tests.common.impala_test_suite import ImpalaTestSuite from tests.common.skip import SkipIfLocal from tests.common.test_dimensions import ( - create_single_exec_option_dimension, + create_exec_option_dimension, create_uncompressed_text_dimension) from tests.util.calculation_util import get_random_id from tests.util.filesystem_utils import get_fs_path, IS_S3 @@ -37,48 +37,46 @@ class TestUdfs(ImpalaTestSuite): @classmethod def add_test_dimensions(cls): super(TestUdfs, cls).add_test_dimensions() - # Without limiting the test suite to a single exec option, the tests will fail - # because the same test case may be executed in parallel with different exec option - # values leading to conflicting DDL ops. - cls.TestMatrix.add_dimension(create_single_exec_option_dimension()) + cls.TestMatrix.add_dimension( + create_exec_option_dimension(disable_codegen_options=[False, True])) # There is no reason to run these tests using all dimensions. cls.TestMatrix.add_dimension(create_uncompressed_text_dimension(cls.get_workload())) - def test_native_functions(self, vector): - database = 'native_function_test' - + def test_native_functions(self, vector, unique_database): self.__load_functions( - self.create_udfs_template, vector, database, + self.create_udfs_template, vector, unique_database, get_fs_path('/test-warehouse/libTestUdfs.so')) self.__load_functions( - self.create_sample_udas_template, vector, database, + self.create_sample_udas_template, vector, unique_database, get_fs_path('/test-warehouse/libudasample.so')) self.__load_functions( - self.create_test_udas_template, vector, database, + self.create_test_udas_template, vector, unique_database, get_fs_path('/test-warehouse/libTestUdas.so')) - self.run_test_case('QueryTest/udf', vector, use_db=database) - if not IS_S3: # S3 doesn't support INSERT - self.run_test_case('QueryTest/udf-init-close', vector, use_db=database) - self.run_test_case('QueryTest/uda', vector, use_db=database) + self.run_test_case('QueryTest/udf', vector, use_db=unique_database) + if not vector.get_value('exec_option')['disable_codegen']: + self.run_test_case('QueryTest/udf-codegen-required', vector, use_db=unique_database) + self.run_test_case('QueryTest/udf-init-close', vector, use_db=unique_database) + self.run_test_case('QueryTest/uda', vector, use_db=unique_database) - def test_ir_functions(self, vector): - database = 'ir_function_test' + def test_ir_functions(self, vector, unique_database): + if vector.get_value('exec_option')['disable_codegen']: + # IR functions require codegen to be enabled. + return self.__load_functions( - self.create_udfs_template, vector, database, + self.create_udfs_template, vector, unique_database, get_fs_path('/test-warehouse/test-udfs.ll')) - self.run_test_case('QueryTest/udf', vector, use_db=database) - if not IS_S3: # S3 doesn't support INSERT - self.run_test_case('QueryTest/udf-init-close', vector, use_db=database) + self.run_test_case('QueryTest/udf', vector, use_db=unique_database) + self.run_test_case('QueryTest/udf-init-close', vector, use_db=unique_database) - def test_udf_errors(self, vector): + def test_udf_errors(self, vector, unique_database): # Disable codegen to force interpretation path to be taken. # Aim to exercise two failure cases: # 1. too many arguments # 2. IR UDF vector.get_value('exec_option')['disable_codegen'] = 1 - self.run_test_case('QueryTest/udf-errors', vector) + self.run_test_case('QueryTest/udf-errors', vector, use_db=unique_database) def test_udf_invalid_symbol(self, vector): """ IMPALA-1642: Impala crashes if the symbol for a Hive UDF doesn't exist @@ -98,17 +96,9 @@ class TestUdfs(ImpalaTestSuite): finally: self.client.execute(drop_fn_stmt) - def test_java_udfs(self, vector): - self.client.execute("create database if not exists java_udfs_test " - "location '%s'" % get_fs_path('/test-warehouse/java_udf_test.db')) - self.client.execute("create database if not exists udf_test " - "location '%s'" % get_fs_path('/test-warehouse/udf_test.db')) - try: - self.run_test_case('QueryTest/load-java-udfs', vector) - self.run_test_case('QueryTest/java-udf', vector) - finally: - self.client.execute("drop database if exists java_udfs_test cascade") - self.client.execute("drop database if exists udf_test cascade") + def test_java_udfs(self, vector, unique_database): + self.run_test_case('QueryTest/load-java-udfs', vector, use_db=unique_database) + self.run_test_case('QueryTest/java-udf', vector, use_db=unique_database) @SkipIfLocal.multiple_impalad def test_hive_udfs_missing_jar(self, vector, unique_database): @@ -150,13 +140,8 @@ class TestUdfs(ImpalaTestSuite): except ImpalaBeeswaxException, e: assert "Failed to get file info" in str(e) - def test_libs_with_same_filenames(self, vector): - self.client.execute("create database if not exists same_lib_filename_udf_test " - "location '%s'" % get_fs_path('/test-warehouse/same_lib_filename_udf_test.db')) - try: - self.run_test_case('QueryTest/libs_with_same_filenames', vector) - finally: - self.client.execute("drop database if exists same_lib_filename_udf_test cascade") + def test_libs_with_same_filenames(self, vector, unique_database): + self.run_test_case('QueryTest/libs_with_same_filenames', vector, use_db=unique_database) def test_udf_update_via_drop(self, vector, unique_database): """Test updating the UDF binary without restarting Impala. Dropping @@ -263,19 +248,19 @@ class TestUdfs(ImpalaTestSuite): # Run serially because this will blow the process limit, potentially causing other # queries to fail @pytest.mark.execute_serially - def test_mem_limits(self, vector): + def test_mem_limits(self, vector, unique_database): # Set the mem limit high enough that a simple scan can run mem_limit = 1024 * 1024 vector.get_value('exec_option')['mem_limit'] = mem_limit try: - self.run_test_case('QueryTest/udf-mem-limit', vector) + self.run_test_case('QueryTest/udf-mem-limit', vector, use_db=unique_database) assert False, "Query was expected to fail" except ImpalaBeeswaxException, e: self.__check_exception(e) try: - self.run_test_case('QueryTest/uda-mem-limit', vector) + self.run_test_case('QueryTest/uda-mem-limit', vector, use_db=unique_database) assert False, "Query was expected to fail" except ImpalaBeeswaxException, e: self.__check_exception(e) @@ -330,6 +315,8 @@ returns decimal(9,2) location '{location}' update_fn='SumSmallDecimalUpdate'; create_test_udas_template = """ drop function if exists {database}.trunc_sum(double); drop function if exists {database}.arg_is_const(int, int); +drop function if exists {database}.toggle_null(int); +drop function if exists {database}.count_nulls(bigint); create database if not exists {database}; @@ -341,6 +328,14 @@ serialize_fn='TruncSumSerialize' finalize_fn='TruncSumFinalize'; create aggregate function {database}.arg_is_const(int, int) returns boolean location '{location}' init_fn='ArgIsConstInit' update_fn='ArgIsConstUpdate' merge_fn='ArgIsConstMerge'; + +create aggregate function {database}.toggle_null(int) +returns int location '{location}' +update_fn='ToggleNullUpdate' merge_fn='ToggleNullMerge'; + +create aggregate function {database}.count_nulls(bigint) +returns bigint location '{location}' +update_fn='CountNullsUpdate' merge_fn='CountNullsMerge'; """ # Create test UDF functions in {database} from library {location}
