Repository: incubator-impala Updated Branches: refs/heads/master c189d0a39 -> f03900a80
IMPALA-5425: Add test for validating input when setting query options This patch adds multiple query option validation testcases to be/src/service/query-options-test.cc The test cases include parsing edge cases, bondary values, special cases for some options and some testcases moved from testdata/workloads/functional-query/queries/QueryTest/set.test This patch also fixes a bug generating wrong error message for query option RUNTIME_FILTER_WAIT_TIME_MS. Change-Id: I510e02bb0776673d8cbfc22b903831882c6908d7 Reviewed-on: http://gerrit.cloudera.org:8080/7805 Reviewed-by: Dan Hecht <[email protected]> Tested-by: Impala Public Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/87065638 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/87065638 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/87065638 Branch: refs/heads/master Commit: 87065638f04d0830d359b5d145f62c8cb44014d3 Parents: c189d0a Author: Tianyi Wang <[email protected]> Authored: Fri Aug 25 15:13:57 2017 -0700 Committer: Impala Public Jenkins <[email protected]> Committed: Tue Oct 10 21:54:33 2017 +0000 ---------------------------------------------------------------------- be/src/service/query-options-test.cc | 331 +++++++++++++++---- be/src/service/query-options.cc | 2 +- .../functional-query/queries/QueryTest/set.test | 58 ---- 3 files changed, 265 insertions(+), 126 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/87065638/be/src/service/query-options-test.cc ---------------------------------------------------------------------- diff --git a/be/src/service/query-options-test.cc b/be/src/service/query-options-test.cc index 75b1567..e617ef2 100644 --- a/be/src/service/query-options-test.cc +++ b/be/src/service/query-options-test.cc @@ -17,10 +17,8 @@ #include "service/query-options.h" -#include <boost/lexical_cast.hpp> -#include <inttypes.h> -#include <string> -#include <gutil/strings/substitute.h> +#include <boost/preprocessor/seq/for_each.hpp> +#include <boost/preprocessor/tuple/to_seq.hpp> #include "runtime/runtime-filter.h" #include "testutil/gtest-util.h" @@ -29,81 +27,280 @@ using namespace boost; using namespace impala; using namespace std; -using namespace strings; -TEST(QueryOptions, SetBloomSize) { +constexpr int32_t I32_MAX = numeric_limits<int32_t>::max(); +constexpr int64_t I64_MAX = numeric_limits<int64_t>::max(); + +// A option definition is a key string and a pointer to the actual value in an instance of +// TQueryOptions. For example, the OptionDef for prefetch_mode is +// OptionDef{"prefetch_mode", &options.prefetch_mode}. +// The pointer is used to fetch the field and check equality in test functions. +template<typename T> struct OptionDef { + using OptionType = T; + const char* option_name; + T* option_field; +}; + +// Expand MAKE_OPTIONDEF(option) into OptionDef{"option", &options.option} +#define MAKE_OPTIONDEF(name) OptionDef<decltype(options.name)>{#name, &options.name} + +// Range defines the lower/upper bound restriction for byte/integer options. +template<typename T> struct Range { + T lower_bound; + T upper_bound; +}; + +// A ValueDef{str, value} defines that str should be parsed into value. +template<typename T> struct ValueDef { + const char* str; + T value; +}; + +// A EnumCase consists of an OptionDef and a list of valid value definitions. +// For example, the EnumCase for prefetch_mode is +// EnumCase<TPrefetchMode::type>{OptionDef{"prefetch_mode", &options.prefetch_mode}, +// {{"None", TPrefetchMode::None}, {"HT_BUCKET", TPrefetchMode::HT_BUCKET},}} +template<typename T> struct EnumCase { + OptionDef<T> option_def; + vector<ValueDef<T>> value_defs; +}; + +// Create a shortcut function to test if 'value' would be successfully read as expected +// value. +template<typename T> +auto MakeTestOkFn(TQueryOptions& options, OptionDef<T> option_def) { + return [&options, option_def](const char* str, const T& expected_value) { + EXPECT_OK(SetQueryOption(option_def.option_name, str, &options, nullptr)); + EXPECT_EQ(expected_value, *option_def.option_field); + }; +} + +// Create a shortcut function to test if 'value' would result into a failure. +template<typename T> +auto MakeTestErrFn(TQueryOptions& options, OptionDef<T> option_def) { + return [&options, option_def](const char* str) { + EXPECT_FALSE(SetQueryOption(option_def.option_name, str, &options, nullptr).ok()); + }; +} + +TEST(QueryOptions, SetNonExistentOptions) { TQueryOptions options; - vector<pair<string, int*>> option_list = { - {"RUNTIME_BLOOM_FILTER_SIZE", &options.runtime_bloom_filter_size}, - {"RUNTIME_FILTER_MAX_SIZE", &options.runtime_filter_max_size}, - {"RUNTIME_FILTER_MIN_SIZE", &options.runtime_filter_min_size}}; - for (const auto& option: option_list) { - - // The upper and lower bound of the allowed values: - EXPECT_FALSE(SetQueryOption(option.first, - lexical_cast<string>(RuntimeFilterBank::MIN_BLOOM_FILTER_SIZE - 1), &options, - NULL) - .ok()); - - EXPECT_FALSE(SetQueryOption(option.first, - lexical_cast<string>(RuntimeFilterBank::MAX_BLOOM_FILTER_SIZE + 1), &options, - NULL) - .ok()); - - EXPECT_OK(SetQueryOption(option.first, - lexical_cast<string>(RuntimeFilterBank::MIN_BLOOM_FILTER_SIZE), &options, NULL)); - EXPECT_EQ(RuntimeFilterBank::MIN_BLOOM_FILTER_SIZE, *option.second); - - EXPECT_OK(SetQueryOption(option.first, - lexical_cast<string>(RuntimeFilterBank::MAX_BLOOM_FILTER_SIZE), &options, NULL)); - EXPECT_EQ(RuntimeFilterBank::MAX_BLOOM_FILTER_SIZE, *option.second); - - // Parsing memory values works in a reasonable way: - EXPECT_OK(SetQueryOption(option.first, "1MB", &options, NULL)); - EXPECT_EQ(1 << 20, *option.second); - - // Bloom filters cannot occupy a percentage of memory: - EXPECT_FALSE(SetQueryOption(option.first, "10%", &options, NULL).ok()); + EXPECT_FALSE(SetQueryOption("", "bar", &options, nullptr).ok()); + EXPECT_FALSE(SetQueryOption("foo", "bar", &options, nullptr).ok()); +} + +// Test a set of test cases, where a test case is a pair of OptionDef, and a pair of the +// lower and the upper bound. +template<typename T> +void TestByteCaseSet(TQueryOptions& options, + const vector<pair<OptionDef<T>, Range<T>>>& case_set) { + for (const auto& test_case: case_set) { + const OptionDef<T>& option_def = test_case.first; + const Range<T>& range = test_case.second; + auto TestOk = MakeTestOkFn(options, option_def); + auto TestError = MakeTestErrFn(options, option_def); + for (T boundary: {range.lower_bound, range.upper_bound}) { + // Byte options treat -1 as 0, which means infinite. + if (boundary == -1) boundary = 0; + TestOk(to_string(boundary).c_str(), boundary); + TestOk((to_string(boundary) + "B").c_str(), boundary); + } + TestError(to_string(range.lower_bound - 1).c_str()); + TestError(to_string(static_cast<uint64_t>(range.upper_bound) + 1).c_str()); + TestError("1tb"); + TestError("1%"); + TestError("1%B"); + TestError("1B%"); + TestError("Not a number!"); + ValueDef<int64_t> common_values[] = { + {"0", 0}, + {"1 B", 1}, + {"0Kb", 0}, + {"4G", 4ll * 1024 * 1024 * 1024}, + {"-1M", -1024 * 1024} + }; + for (const auto& value_def : common_values) { + if (value_def.value >= range.lower_bound && value_def.value <= range.upper_bound) { + TestOk(value_def.str, value_def.value); + } else { + TestError(value_def.str); + } + } } } -TEST(QueryOptions, SetFilterWait) { +// Test byte size options. Byte size options accept both integers or integers with a +// suffix like "KB". They treat -1 and 0 as infinite. Some of them have a lower bound and +// an upper bound. +TEST(QueryOptions, SetByteOptions) { TQueryOptions options; + // key and its valid range: [(key, (min, max))] + vector<pair<OptionDef<int64_t>, Range<int64_t>>> case_set_i64 { + {MAKE_OPTIONDEF(mem_limit), {-1, I64_MAX}}, + {MAKE_OPTIONDEF(max_scan_range_length), {-1, I64_MAX}}, + {MAKE_OPTIONDEF(rm_initial_mem), {-1, I64_MAX}}, + {MAKE_OPTIONDEF(buffer_pool_limit), {-1, I64_MAX}}, + {MAKE_OPTIONDEF(max_row_size), {1, I64_MAX}}, + {MAKE_OPTIONDEF(parquet_file_size), {-1, I32_MAX}} + }; + vector<pair<OptionDef<int32_t>, Range<int32_t>>> case_set_i32 { + {MAKE_OPTIONDEF(runtime_filter_min_size), + {RuntimeFilterBank::MIN_BLOOM_FILTER_SIZE, + RuntimeFilterBank::MAX_BLOOM_FILTER_SIZE}}, + {MAKE_OPTIONDEF(runtime_filter_max_size), + {RuntimeFilterBank::MIN_BLOOM_FILTER_SIZE, + RuntimeFilterBank::MAX_BLOOM_FILTER_SIZE}}, + {MAKE_OPTIONDEF(runtime_bloom_filter_size), + {RuntimeFilterBank::MIN_BLOOM_FILTER_SIZE, + RuntimeFilterBank::MAX_BLOOM_FILTER_SIZE}} + }; + TestByteCaseSet(options, case_set_i64); + TestByteCaseSet(options, case_set_i32); +} + +// Test an enum test case. It may or may not accept integers. +template<typename T> +void TestEnumCase(TQueryOptions& options, const EnumCase<T>& test_case, + bool accept_integer) { + auto TestOk = MakeTestOkFn(options, test_case.option_def); + auto TestError = MakeTestErrFn(options, test_case.option_def); + TestError("foo"); + TestError("1%"); + TestError("-1"); + // Test max enum value + 1 + TestError(to_string(test_case.value_defs.back().value + 1).c_str()); + for (const auto& value_def: test_case.value_defs) { + TestOk(value_def.str, value_def.value); + string numeric_str = to_string(value_def.value); + if (accept_integer) { + TestOk(numeric_str.c_str(), value_def.value); + } else { + TestError(numeric_str.c_str()); + } + } +} - // The upper and lower bound of the allowed values: - EXPECT_FALSE(SetQueryOption("RUNTIME_FILTER_WAIT_TIME_MS", "-1", &options, NULL).ok()); +// Test enum options. Some enum options accept integers while others don't. +TEST(QueryOptions, SetEnumOptions) { + // A set of macros expanding to an EnumDef. + // ENTRY(_, TPrefetchMode, None) expands to {"None", TPrefetchMode::None}, +#define ENTRY(_, prefix, entry) {BOOST_PP_STRINGIZE(entry), prefix::entry}, - EXPECT_FALSE(SetQueryOption("RUNTIME_FILTER_WAIT_TIME_MS", - lexical_cast<string>(static_cast<int64_t>(numeric_limits<int32_t>::max()) + 1), - &options, NULL).ok()); + // ENTRIES(TPrefetchMode, (None)(HT_BUCKET)) expands to + // {"None", TPrefetchMode::None}, {"HT_BUCKET", TPrefetchMode::HT_BUCKET}, +#define ENTRIES(prefix, name) BOOST_PP_SEQ_FOR_EACH(ENTRY, prefix, name) - EXPECT_OK(SetQueryOption("RUNTIME_FILTER_WAIT_TIME_MS", "0", &options, NULL)); - EXPECT_EQ(0, options.runtime_filter_wait_time_ms); + // CASE(prefetch_mode, TPrefetchMode, (NONE, HT_BUCKET)) expands to: + // EnumCase{OptionDef{"prefetch_mode", &options.prefetch_mode}, + // {{"None", TPrefetchMode::None}, {"HT_BUCKET", TPrefetchMode::HT_BUCKET},}} +#define CASE(key, enumtype, enums) EnumCase<decltype(options.key)> { \ + MAKE_OPTIONDEF(key), {ENTRIES(enumtype, BOOST_PP_TUPLE_TO_SEQ(enums))}} - EXPECT_OK(SetQueryOption("RUNTIME_FILTER_WAIT_TIME_MS", - lexical_cast<string>(numeric_limits<int32_t>::max()), &options, NULL)); - EXPECT_EQ(numeric_limits<int32_t>::max(), options.runtime_filter_wait_time_ms); + TQueryOptions options; + TestEnumCase(options, CASE(prefetch_mode, TPrefetchMode, (NONE, HT_BUCKET)), true); + TestEnumCase(options, CASE(default_join_distribution_mode, TJoinDistributionMode, + (BROADCAST, SHUFFLE)), true); + TestEnumCase(options, CASE(explain_level, TExplainLevel, + (MINIMAL, STANDARD, EXTENDED, VERBOSE)), true); + TestEnumCase(options, CASE(parquet_fallback_schema_resolution, + TParquetFallbackSchemaResolution, (POSITION, NAME)), true); + TestEnumCase(options, CASE(parquet_array_resolution, TParquetArrayResolution, + (THREE_LEVEL, TWO_LEVEL, TWO_LEVEL_THEN_THREE_LEVEL)), true); + TestEnumCase(options, CASE(seq_compression_mode, THdfsSeqCompressionMode, + (BLOCK, RECORD)), false); + TestEnumCase(options, CASE(compression_codec, THdfsCompression, + (NONE, GZIP, BZIP2, DEFAULT, SNAPPY, SNAPPY_BLOCKED)), false); +#undef CASE +#undef ENTRIES +#undef ENTRY } -TEST(QueryOptions, MaxScanRangeLength) { - vector<pair<string, int64_t>> vals = {{"4GB", 4L * 1024 * 1024 * 1024}, - {"-1M", -1}, - {"0B", 0}, - {"1024", 1024}, - {"9223372036854775807", 9223372036854775807}, - {"9223372036854775808", -1}, // 2**63 - {"Not a number!", -1}}; - for (const auto& val: vals) { - TQueryOptions options; - QueryOptionsMask mask; - Status status = ParseQueryOptions( - Substitute("MAX_SCAN_RANGE_LENGTH=$0", val.first), &options, &mask); - int64_t expected = val.second; - if (expected == -1) { - EXPECT_FALSE(status.ok()); - } else { - EXPECT_OK(status); - EXPECT_EQ(expected, options.max_scan_range_length); +// Test integer options. Some of them have lower/upper bounds. +TEST(QueryOptions, SetIntOptions) { + TQueryOptions options; + // List of pairs of Key and its valid range + pair<OptionDef<int32_t>, Range<int32_t>> case_set[] { + {MAKE_OPTIONDEF(runtime_filter_wait_time_ms), {0, I32_MAX}}, + {MAKE_OPTIONDEF(mt_dop), {0, 64}}, + {MAKE_OPTIONDEF(disable_codegen_rows_threshold), {0, I32_MAX}}, + {MAKE_OPTIONDEF(max_num_runtime_filters), {0, I32_MAX}} + }; + for (const auto& test_case : case_set) { + const OptionDef<int32_t>& option_def = test_case.first; + const Range<int32_t>& range = test_case.second; + auto TestOk = MakeTestOkFn(options, option_def); + auto TestError = MakeTestErrFn(options, option_def); + TestError("1M"); + TestError("0B"); + TestError("1%"); + TestOk(to_string(range.lower_bound).c_str(), range.lower_bound); + TestOk(to_string(range.upper_bound).c_str(), range.upper_bound); + TestError(to_string(int64_t(range.lower_bound) - 1).c_str()); + TestError(to_string(int64_t(range.upper_bound) + 1).c_str()); + } +} + +// Test options with non regular validation rule +TEST(QueryOptions, SetSpecialOptions) { + // REPLICA_PREFERENCE cannot be set to 0 if DISABLE_CACHED_READS is true + // It also has unsettable enum values: cache_rack(1) & disk_rack(3) + TQueryOptions options; + { + OptionDef<TReplicaPreference::type> key_def = MAKE_OPTIONDEF(replica_preference); + auto TestOk = MakeTestOkFn(options, key_def); + auto TestError = MakeTestErrFn(options, key_def); + EXPECT_OK(SetQueryOption("DISABLE_CACHED_READS", "false", &options, nullptr)); + TestOk("cache_local", TReplicaPreference::CACHE_LOCAL); + TestOk("0", TReplicaPreference::CACHE_LOCAL); + EXPECT_OK(SetQueryOption("DISABLE_CACHED_READS", "true", &options, nullptr)); + TestError("cache_local"); + TestError("0"); + TestError("cache_rack"); + TestError("1"); + TestOk("disk_local", TReplicaPreference::DISK_LOCAL); + TestOk("2", TReplicaPreference::DISK_LOCAL); + TestError("disk_rack"); + TestError("3"); + TestOk("remote", TReplicaPreference::REMOTE); + TestOk("4", TReplicaPreference::REMOTE); + TestError("5"); + } + // SCRATCH_LIMIT is a byte option where, unlike other byte options, -1 is not treated as + // 0 + { + OptionDef<int64_t> key_def = MAKE_OPTIONDEF(scratch_limit); + auto TestOk = MakeTestOkFn(options, key_def); + auto TestError = MakeTestErrFn(options, key_def); + TestOk("-1", -1); + TestOk("0", 0); + TestOk("4GB", 4ll * 1024 * 1024 * 1024); + TestError("-1MB"); + TestError("1tb"); + TestError("1%"); + TestError("1%B"); + TestError("1B%"); + TestOk("1 B", 1); + TestError("Not a number!"); + } + // DEFAULT_SPILLABLE_BUFFER_SIZE and MIN_SPILLABLE_BUFFER_SIZE accepts -1, 0 and memory + // value that is power of 2 + { + OptionDef<int64_t> key_def_default = MAKE_OPTIONDEF(default_spillable_buffer_size); + OptionDef<int64_t> key_def_min = MAKE_OPTIONDEF(min_spillable_buffer_size); + for (const auto& key_def : {key_def_min, key_def_default}) { + auto TestOk = MakeTestOkFn(options, key_def); + auto TestError = MakeTestErrFn(options, key_def); + TestOk("-1", 0); + TestOk("-1B", 0); + TestOk("0", 0); + TestOk("0B", 0); + TestOk("1", 1); + TestOk("2", 2); + TestError("3"); + TestOk("1K", 1024); + TestOk("2MB", 2 * 1024 * 1024); + TestOk("32G", 32ll * 1024 * 1024 * 1024); + TestError("10MB"); } } } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/87065638/be/src/service/query-options.cc ---------------------------------------------------------------------- diff --git a/be/src/service/query-options.cc b/be/src/service/query-options.cc index d4f1f73..047a8a2 100644 --- a/be/src/service/query-options.cc +++ b/be/src/service/query-options.cc @@ -376,7 +376,7 @@ Status impala::SetQueryOption(const string& key, const string& value, if (result != StringParser::PARSE_SUCCESS || time_ms < 0) { return Status( Substitute("$0 is not a valid wait time. Valid sizes are in [0, $1].", - value, 0, numeric_limits<int32_t>::max())); + value, numeric_limits<int32_t>::max())); } query_options->__set_runtime_filter_wait_time_ms(time_ms); break; http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/87065638/testdata/workloads/functional-query/queries/QueryTest/set.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/set.test b/testdata/workloads/functional-query/queries/QueryTest/set.test index 364a01e..b4cda70 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/set.test +++ b/testdata/workloads/functional-query/queries/QueryTest/set.test @@ -101,39 +101,6 @@ set; STRING, STRING ==== ---- QUERY -# IMPALA-1906: Test that SET changes PARQUET_FILE_SIZE only if it's less than 2GB. -set parquet_file_size='1.5g'; -set; ----- RESULTS: VERIFY_IS_SUBSET -'ABORT_ON_DEFAULT_LIMIT_EXCEEDED','0' -'ABORT_ON_ERROR','0' -'ALLOW_UNSUPPORTED_FORMATS','0' -'BATCH_SIZE','0' -'DEBUG_ACTION','' -'DEFAULT_ORDER_BY_LIMIT','-1' -'DISABLE_CACHED_READS','0' -'DISABLE_CODEGEN','0' -'DISABLE_OUTERMOST_TOPN','0' -'EXPLAIN_LEVEL','1' -'HBASE_CACHE_BLOCKS','0' -'HBASE_CACHING','0' -'MAX_ERRORS','100' -'MAX_IO_BUFFERS','0' -'MAX_SCAN_RANGE_LENGTH','0' -'MEM_LIMIT','0' -'NUM_NODES','0' -'NUM_SCANNER_THREADS','0' -'COMPRESSION_CODEC','' -'PARQUET_FILE_SIZE','1610612736' -'REQUEST_POOL','' -'RESERVATION_REQUEST_TIMEOUT','' -'RM_INITIAL_MEM','0' -'SYNC_DDL','0' -'V_CPU_CORES','' ----- TYPES -STRING, STRING -==== ----- QUERY set parquet_file_size='2g' ---- CATCH The PARQUET_FILE_SIZE query option must be less than 2GB. @@ -263,31 +230,6 @@ explain select count(distinct double_col) from functional.alltypesagg; '00:SCAN HDFS [functional.alltypesagg]' ==== ---- QUERY -# IMPALA-5591: This shouldn't throw an error. -set scratch_limit=-1; -==== ----- QUERY -# Power of two max_row_size. -set max_row_size=8m; -set; ----- RESULTS: VERIFY_IS_SUBSET -'MAX_ROW_SIZE','8388608' ----- TYPES -STRING,STRING -==== ----- QUERY -# Non power of two max_row_size. -set max_row_size=12345; -set; ----- RESULTS: VERIFY_IS_SUBSET -'MAX_ROW_SIZE','12345' ----- TYPES -STRING,STRING -==== ----- QUERY -set max_row_size=8m; -==== ----- QUERY set max_row_size=-1; ---- CATCH Max row size must be a positive number of bytes: -1
