Repository: incubator-impala
Updated Branches:
  refs/heads/master c189d0a39 -> f03900a80


IMPALA-5425: Add test for validating input when setting query options

This patch adds multiple query option validation testcases to
be/src/service/query-options-test.cc
The test cases include parsing edge cases, bondary values, special
cases for some options and some testcases moved from
testdata/workloads/functional-query/queries/QueryTest/set.test
This patch also fixes a bug generating wrong error message for
query option RUNTIME_FILTER_WAIT_TIME_MS.

Change-Id: I510e02bb0776673d8cbfc22b903831882c6908d7
Reviewed-on: http://gerrit.cloudera.org:8080/7805
Reviewed-by: Dan Hecht <[email protected]>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/87065638
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/87065638
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/87065638

Branch: refs/heads/master
Commit: 87065638f04d0830d359b5d145f62c8cb44014d3
Parents: c189d0a
Author: Tianyi Wang <[email protected]>
Authored: Fri Aug 25 15:13:57 2017 -0700
Committer: Impala Public Jenkins <[email protected]>
Committed: Tue Oct 10 21:54:33 2017 +0000

----------------------------------------------------------------------
 be/src/service/query-options-test.cc            | 331 +++++++++++++++----
 be/src/service/query-options.cc                 |   2 +-
 .../functional-query/queries/QueryTest/set.test |  58 ----
 3 files changed, 265 insertions(+), 126 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/87065638/be/src/service/query-options-test.cc
----------------------------------------------------------------------
diff --git a/be/src/service/query-options-test.cc 
b/be/src/service/query-options-test.cc
index 75b1567..e617ef2 100644
--- a/be/src/service/query-options-test.cc
+++ b/be/src/service/query-options-test.cc
@@ -17,10 +17,8 @@
 
 #include "service/query-options.h"
 
-#include <boost/lexical_cast.hpp>
-#include <inttypes.h>
-#include <string>
-#include <gutil/strings/substitute.h>
+#include <boost/preprocessor/seq/for_each.hpp>
+#include <boost/preprocessor/tuple/to_seq.hpp>
 
 #include "runtime/runtime-filter.h"
 #include "testutil/gtest-util.h"
@@ -29,81 +27,280 @@
 using namespace boost;
 using namespace impala;
 using namespace std;
-using namespace strings;
 
-TEST(QueryOptions, SetBloomSize) {
+constexpr int32_t I32_MAX = numeric_limits<int32_t>::max();
+constexpr int64_t I64_MAX = numeric_limits<int64_t>::max();
+
+// A option definition is a key string and a pointer to the actual value in an 
instance of
+// TQueryOptions. For example, the OptionDef for prefetch_mode is
+// OptionDef{"prefetch_mode", &options.prefetch_mode}.
+// The pointer is used to fetch the field and check equality in test functions.
+template<typename T> struct OptionDef {
+  using OptionType = T;
+  const char* option_name;
+  T* option_field;
+};
+
+// Expand MAKE_OPTIONDEF(option) into OptionDef{"option", &options.option}
+#define MAKE_OPTIONDEF(name) OptionDef<decltype(options.name)>{#name, 
&options.name}
+
+// Range defines the lower/upper bound restriction for byte/integer options.
+template<typename T> struct Range {
+  T lower_bound;
+  T upper_bound;
+};
+
+// A ValueDef{str, value} defines that str should be parsed into value.
+template<typename T> struct ValueDef {
+  const char* str;
+  T value;
+};
+
+// A EnumCase consists of an OptionDef and a list of valid value definitions.
+// For example, the EnumCase for prefetch_mode is
+// EnumCase<TPrefetchMode::type>{OptionDef{"prefetch_mode", 
&options.prefetch_mode},
+//     {{"None", TPrefetchMode::None}, {"HT_BUCKET", 
TPrefetchMode::HT_BUCKET},}}
+template<typename T> struct EnumCase {
+  OptionDef<T> option_def;
+  vector<ValueDef<T>> value_defs;
+};
+
+// Create a shortcut function to test if 'value' would be successfully read as 
expected
+// value.
+template<typename T>
+auto MakeTestOkFn(TQueryOptions& options, OptionDef<T> option_def) {
+  return [&options, option_def](const char* str, const T& expected_value) {
+    EXPECT_OK(SetQueryOption(option_def.option_name, str, &options, nullptr));
+    EXPECT_EQ(expected_value, *option_def.option_field);
+  };
+}
+
+// Create a shortcut function to test if 'value' would result into a failure.
+template<typename T>
+auto MakeTestErrFn(TQueryOptions& options, OptionDef<T> option_def) {
+  return [&options, option_def](const char* str) {
+    EXPECT_FALSE(SetQueryOption(option_def.option_name, str, &options, 
nullptr).ok());
+  };
+}
+
+TEST(QueryOptions, SetNonExistentOptions) {
   TQueryOptions options;
-  vector<pair<string, int*>> option_list = {
-    {"RUNTIME_BLOOM_FILTER_SIZE", &options.runtime_bloom_filter_size},
-    {"RUNTIME_FILTER_MAX_SIZE", &options.runtime_filter_max_size},
-    {"RUNTIME_FILTER_MIN_SIZE", &options.runtime_filter_min_size}};
-  for (const auto& option: option_list) {
-
-    // The upper and lower bound of the allowed values:
-    EXPECT_FALSE(SetQueryOption(option.first,
-        lexical_cast<string>(RuntimeFilterBank::MIN_BLOOM_FILTER_SIZE - 1), 
&options,
-        NULL)
-        .ok());
-
-    EXPECT_FALSE(SetQueryOption(option.first,
-        lexical_cast<string>(RuntimeFilterBank::MAX_BLOOM_FILTER_SIZE + 1), 
&options,
-        NULL)
-        .ok());
-
-    EXPECT_OK(SetQueryOption(option.first,
-        lexical_cast<string>(RuntimeFilterBank::MIN_BLOOM_FILTER_SIZE), 
&options, NULL));
-    EXPECT_EQ(RuntimeFilterBank::MIN_BLOOM_FILTER_SIZE, *option.second);
-
-    EXPECT_OK(SetQueryOption(option.first,
-        lexical_cast<string>(RuntimeFilterBank::MAX_BLOOM_FILTER_SIZE), 
&options, NULL));
-    EXPECT_EQ(RuntimeFilterBank::MAX_BLOOM_FILTER_SIZE, *option.second);
-
-    // Parsing memory values works in a reasonable way:
-    EXPECT_OK(SetQueryOption(option.first, "1MB", &options, NULL));
-    EXPECT_EQ(1 << 20, *option.second);
-
-    // Bloom filters cannot occupy a percentage of memory:
-    EXPECT_FALSE(SetQueryOption(option.first, "10%", &options, NULL).ok());
+  EXPECT_FALSE(SetQueryOption("", "bar", &options, nullptr).ok());
+  EXPECT_FALSE(SetQueryOption("foo", "bar", &options, nullptr).ok());
+}
+
+// Test a set of test cases, where a test case is a pair of OptionDef, and a 
pair of the
+// lower and the upper bound.
+template<typename T>
+void TestByteCaseSet(TQueryOptions& options,
+    const vector<pair<OptionDef<T>, Range<T>>>& case_set) {
+  for (const auto& test_case: case_set) {
+    const OptionDef<T>& option_def = test_case.first;
+    const Range<T>& range = test_case.second;
+    auto TestOk = MakeTestOkFn(options, option_def);
+    auto TestError = MakeTestErrFn(options, option_def);
+    for (T boundary: {range.lower_bound, range.upper_bound}) {
+      // Byte options treat -1 as 0, which means infinite.
+      if (boundary == -1) boundary = 0;
+      TestOk(to_string(boundary).c_str(), boundary);
+      TestOk((to_string(boundary) + "B").c_str(), boundary);
+    }
+    TestError(to_string(range.lower_bound - 1).c_str());
+    TestError(to_string(static_cast<uint64_t>(range.upper_bound) + 1).c_str());
+    TestError("1tb");
+    TestError("1%");
+    TestError("1%B");
+    TestError("1B%");
+    TestError("Not a number!");
+    ValueDef<int64_t> common_values[] = {
+        {"0",   0},
+        {"1 B", 1},
+        {"0Kb", 0},
+        {"4G",  4ll * 1024 * 1024 * 1024},
+        {"-1M", -1024 * 1024}
+    };
+    for (const auto& value_def : common_values) {
+      if (value_def.value >= range.lower_bound && value_def.value <= 
range.upper_bound) {
+        TestOk(value_def.str, value_def.value);
+      } else {
+        TestError(value_def.str);
+      }
+    }
   }
 }
 
-TEST(QueryOptions, SetFilterWait) {
+// Test byte size options. Byte size options accept both integers or integers 
with a
+// suffix like "KB". They treat -1 and 0 as infinite. Some of them have a 
lower bound and
+// an upper bound.
+TEST(QueryOptions, SetByteOptions) {
   TQueryOptions options;
+  // key and its valid range: [(key, (min, max))]
+  vector<pair<OptionDef<int64_t>, Range<int64_t>>> case_set_i64 {
+      {MAKE_OPTIONDEF(mem_limit),             {-1, I64_MAX}},
+      {MAKE_OPTIONDEF(max_scan_range_length), {-1, I64_MAX}},
+      {MAKE_OPTIONDEF(rm_initial_mem),        {-1, I64_MAX}},
+      {MAKE_OPTIONDEF(buffer_pool_limit),     {-1, I64_MAX}},
+      {MAKE_OPTIONDEF(max_row_size),          {1, I64_MAX}},
+      {MAKE_OPTIONDEF(parquet_file_size),     {-1, I32_MAX}}
+  };
+  vector<pair<OptionDef<int32_t>, Range<int32_t>>> case_set_i32 {
+      {MAKE_OPTIONDEF(runtime_filter_min_size),
+          {RuntimeFilterBank::MIN_BLOOM_FILTER_SIZE,
+              RuntimeFilterBank::MAX_BLOOM_FILTER_SIZE}},
+      {MAKE_OPTIONDEF(runtime_filter_max_size),
+          {RuntimeFilterBank::MIN_BLOOM_FILTER_SIZE,
+              RuntimeFilterBank::MAX_BLOOM_FILTER_SIZE}},
+      {MAKE_OPTIONDEF(runtime_bloom_filter_size),
+          {RuntimeFilterBank::MIN_BLOOM_FILTER_SIZE,
+              RuntimeFilterBank::MAX_BLOOM_FILTER_SIZE}}
+  };
+  TestByteCaseSet(options, case_set_i64);
+  TestByteCaseSet(options, case_set_i32);
+}
+
+// Test an enum test case. It may or may not accept integers.
+template<typename T>
+void TestEnumCase(TQueryOptions& options, const EnumCase<T>& test_case,
+    bool accept_integer) {
+  auto TestOk = MakeTestOkFn(options, test_case.option_def);
+  auto TestError = MakeTestErrFn(options, test_case.option_def);
+  TestError("foo");
+  TestError("1%");
+  TestError("-1");
+  // Test max enum value + 1
+  TestError(to_string(test_case.value_defs.back().value + 1).c_str());
+  for (const auto& value_def: test_case.value_defs) {
+    TestOk(value_def.str, value_def.value);
+    string numeric_str = to_string(value_def.value);
+    if (accept_integer) {
+      TestOk(numeric_str.c_str(), value_def.value);
+    } else {
+      TestError(numeric_str.c_str());
+    }
+  }
+}
 
-  // The upper and lower bound of the allowed values:
-  EXPECT_FALSE(SetQueryOption("RUNTIME_FILTER_WAIT_TIME_MS", "-1", &options, 
NULL).ok());
+// Test enum options. Some enum options accept integers while others don't.
+TEST(QueryOptions, SetEnumOptions) {
+  // A set of macros expanding to an EnumDef.
+  // ENTRY(_, TPrefetchMode, None) expands to {"None", TPrefetchMode::None},
+#define ENTRY(_, prefix, entry) {BOOST_PP_STRINGIZE(entry), prefix::entry},
 
-  EXPECT_FALSE(SetQueryOption("RUNTIME_FILTER_WAIT_TIME_MS",
-      
lexical_cast<string>(static_cast<int64_t>(numeric_limits<int32_t>::max()) + 1),
-      &options, NULL).ok());
+  // ENTRIES(TPrefetchMode, (None)(HT_BUCKET)) expands to
+  // {"None", TPrefetchMode::None}, {"HT_BUCKET", TPrefetchMode::HT_BUCKET},
+#define ENTRIES(prefix, name) BOOST_PP_SEQ_FOR_EACH(ENTRY, prefix, name)
 
-  EXPECT_OK(SetQueryOption("RUNTIME_FILTER_WAIT_TIME_MS", "0", &options, 
NULL));
-  EXPECT_EQ(0, options.runtime_filter_wait_time_ms);
+  // CASE(prefetch_mode, TPrefetchMode, (NONE, HT_BUCKET)) expands to:
+  // EnumCase{OptionDef{"prefetch_mode", &options.prefetch_mode},
+  //     {{"None", TPrefetchMode::None}, {"HT_BUCKET", 
TPrefetchMode::HT_BUCKET},}}
+#define CASE(key, enumtype, enums) EnumCase<decltype(options.key)> { \
+    MAKE_OPTIONDEF(key), {ENTRIES(enumtype, BOOST_PP_TUPLE_TO_SEQ(enums))}}
 
-  EXPECT_OK(SetQueryOption("RUNTIME_FILTER_WAIT_TIME_MS",
-      lexical_cast<string>(numeric_limits<int32_t>::max()), &options, NULL));
-  EXPECT_EQ(numeric_limits<int32_t>::max(), 
options.runtime_filter_wait_time_ms);
+  TQueryOptions options;
+  TestEnumCase(options, CASE(prefetch_mode, TPrefetchMode, (NONE, HT_BUCKET)), 
true);
+  TestEnumCase(options, CASE(default_join_distribution_mode, 
TJoinDistributionMode,
+      (BROADCAST, SHUFFLE)), true);
+  TestEnumCase(options, CASE(explain_level, TExplainLevel,
+      (MINIMAL, STANDARD, EXTENDED, VERBOSE)), true);
+  TestEnumCase(options, CASE(parquet_fallback_schema_resolution,
+      TParquetFallbackSchemaResolution, (POSITION, NAME)), true);
+  TestEnumCase(options, CASE(parquet_array_resolution, TParquetArrayResolution,
+      (THREE_LEVEL, TWO_LEVEL, TWO_LEVEL_THEN_THREE_LEVEL)), true);
+  TestEnumCase(options, CASE(seq_compression_mode, THdfsSeqCompressionMode,
+      (BLOCK, RECORD)), false);
+  TestEnumCase(options, CASE(compression_codec, THdfsCompression,
+      (NONE, GZIP, BZIP2, DEFAULT, SNAPPY, SNAPPY_BLOCKED)), false);
+#undef CASE
+#undef ENTRIES
+#undef ENTRY
 }
 
-TEST(QueryOptions, MaxScanRangeLength) {
-  vector<pair<string, int64_t>> vals = {{"4GB", 4L * 1024 * 1024 * 1024},
-                                        {"-1M", -1},
-                                        {"0B", 0},
-                                        {"1024", 1024},
-                                        {"9223372036854775807", 
9223372036854775807},
-                                        {"9223372036854775808", -1}, // 2**63
-                                        {"Not a number!", -1}};
-  for (const auto& val: vals) {
-    TQueryOptions options;
-    QueryOptionsMask mask;
-    Status status = ParseQueryOptions(
-        Substitute("MAX_SCAN_RANGE_LENGTH=$0", val.first), &options, &mask);
-    int64_t expected = val.second;
-    if (expected == -1) {
-      EXPECT_FALSE(status.ok());
-    } else {
-      EXPECT_OK(status);
-      EXPECT_EQ(expected, options.max_scan_range_length);
+// Test integer options. Some of them have lower/upper bounds.
+TEST(QueryOptions, SetIntOptions) {
+  TQueryOptions options;
+  // List of pairs of Key and its valid range
+  pair<OptionDef<int32_t>, Range<int32_t>> case_set[] {
+      {MAKE_OPTIONDEF(runtime_filter_wait_time_ms),    {0, I32_MAX}},
+      {MAKE_OPTIONDEF(mt_dop),                         {0, 64}},
+      {MAKE_OPTIONDEF(disable_codegen_rows_threshold), {0, I32_MAX}},
+      {MAKE_OPTIONDEF(max_num_runtime_filters),        {0, I32_MAX}}
+  };
+  for (const auto& test_case : case_set) {
+    const OptionDef<int32_t>& option_def = test_case.first;
+    const Range<int32_t>& range = test_case.second;
+    auto TestOk = MakeTestOkFn(options, option_def);
+    auto TestError = MakeTestErrFn(options, option_def);
+    TestError("1M");
+    TestError("0B");
+    TestError("1%");
+    TestOk(to_string(range.lower_bound).c_str(), range.lower_bound);
+    TestOk(to_string(range.upper_bound).c_str(), range.upper_bound);
+    TestError(to_string(int64_t(range.lower_bound) - 1).c_str());
+    TestError(to_string(int64_t(range.upper_bound) + 1).c_str());
+  }
+}
+
+// Test options with non regular validation rule
+TEST(QueryOptions, SetSpecialOptions) {
+  // REPLICA_PREFERENCE cannot be set to 0 if DISABLE_CACHED_READS is true
+  // It also has unsettable enum values: cache_rack(1) & disk_rack(3)
+  TQueryOptions options;
+  {
+    OptionDef<TReplicaPreference::type> key_def = 
MAKE_OPTIONDEF(replica_preference);
+    auto TestOk = MakeTestOkFn(options, key_def);
+    auto TestError = MakeTestErrFn(options, key_def);
+    EXPECT_OK(SetQueryOption("DISABLE_CACHED_READS", "false", &options, 
nullptr));
+    TestOk("cache_local", TReplicaPreference::CACHE_LOCAL);
+    TestOk("0", TReplicaPreference::CACHE_LOCAL);
+    EXPECT_OK(SetQueryOption("DISABLE_CACHED_READS", "true", &options, 
nullptr));
+    TestError("cache_local");
+    TestError("0");
+    TestError("cache_rack");
+    TestError("1");
+    TestOk("disk_local", TReplicaPreference::DISK_LOCAL);
+    TestOk("2", TReplicaPreference::DISK_LOCAL);
+    TestError("disk_rack");
+    TestError("3");
+    TestOk("remote", TReplicaPreference::REMOTE);
+    TestOk("4", TReplicaPreference::REMOTE);
+    TestError("5");
+  }
+  // SCRATCH_LIMIT is a byte option where, unlike other byte options, -1 is 
not treated as
+  // 0
+  {
+    OptionDef<int64_t> key_def = MAKE_OPTIONDEF(scratch_limit);
+    auto TestOk = MakeTestOkFn(options, key_def);
+    auto TestError = MakeTestErrFn(options, key_def);
+    TestOk("-1", -1);
+    TestOk("0", 0);
+    TestOk("4GB", 4ll * 1024 * 1024 * 1024);
+    TestError("-1MB");
+    TestError("1tb");
+    TestError("1%");
+    TestError("1%B");
+    TestError("1B%");
+    TestOk("1 B", 1);
+    TestError("Not a number!");
+  }
+  // DEFAULT_SPILLABLE_BUFFER_SIZE and MIN_SPILLABLE_BUFFER_SIZE accepts -1, 0 
and memory
+  // value that is power of 2
+  {
+    OptionDef<int64_t> key_def_default = 
MAKE_OPTIONDEF(default_spillable_buffer_size);
+    OptionDef<int64_t> key_def_min = MAKE_OPTIONDEF(min_spillable_buffer_size);
+    for (const auto& key_def : {key_def_min, key_def_default}) {
+      auto TestOk = MakeTestOkFn(options, key_def);
+      auto TestError = MakeTestErrFn(options, key_def);
+      TestOk("-1", 0);
+      TestOk("-1B", 0);
+      TestOk("0", 0);
+      TestOk("0B", 0);
+      TestOk("1", 1);
+      TestOk("2", 2);
+      TestError("3");
+      TestOk("1K", 1024);
+      TestOk("2MB", 2 * 1024 * 1024);
+      TestOk("32G", 32ll * 1024 * 1024 * 1024);
+      TestError("10MB");
     }
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/87065638/be/src/service/query-options.cc
----------------------------------------------------------------------
diff --git a/be/src/service/query-options.cc b/be/src/service/query-options.cc
index d4f1f73..047a8a2 100644
--- a/be/src/service/query-options.cc
+++ b/be/src/service/query-options.cc
@@ -376,7 +376,7 @@ Status impala::SetQueryOption(const string& key, const 
string& value,
         if (result != StringParser::PARSE_SUCCESS || time_ms < 0) {
           return Status(
               Substitute("$0 is not a valid wait time. Valid sizes are in [0, 
$1].",
-                  value, 0, numeric_limits<int32_t>::max()));
+                  value, numeric_limits<int32_t>::max()));
         }
         query_options->__set_runtime_filter_wait_time_ms(time_ms);
         break;

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/87065638/testdata/workloads/functional-query/queries/QueryTest/set.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/set.test 
b/testdata/workloads/functional-query/queries/QueryTest/set.test
index 364a01e..b4cda70 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/set.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/set.test
@@ -101,39 +101,6 @@ set;
 STRING, STRING
 ====
 ---- QUERY
-# IMPALA-1906: Test that SET changes PARQUET_FILE_SIZE only if it's less than 
2GB.
-set parquet_file_size='1.5g';
-set;
----- RESULTS: VERIFY_IS_SUBSET
-'ABORT_ON_DEFAULT_LIMIT_EXCEEDED','0'
-'ABORT_ON_ERROR','0'
-'ALLOW_UNSUPPORTED_FORMATS','0'
-'BATCH_SIZE','0'
-'DEBUG_ACTION',''
-'DEFAULT_ORDER_BY_LIMIT','-1'
-'DISABLE_CACHED_READS','0'
-'DISABLE_CODEGEN','0'
-'DISABLE_OUTERMOST_TOPN','0'
-'EXPLAIN_LEVEL','1'
-'HBASE_CACHE_BLOCKS','0'
-'HBASE_CACHING','0'
-'MAX_ERRORS','100'
-'MAX_IO_BUFFERS','0'
-'MAX_SCAN_RANGE_LENGTH','0'
-'MEM_LIMIT','0'
-'NUM_NODES','0'
-'NUM_SCANNER_THREADS','0'
-'COMPRESSION_CODEC',''
-'PARQUET_FILE_SIZE','1610612736'
-'REQUEST_POOL',''
-'RESERVATION_REQUEST_TIMEOUT',''
-'RM_INITIAL_MEM','0'
-'SYNC_DDL','0'
-'V_CPU_CORES',''
----- TYPES
-STRING, STRING
-====
----- QUERY
 set parquet_file_size='2g'
 ---- CATCH
 The PARQUET_FILE_SIZE query option must be less than 2GB.
@@ -263,31 +230,6 @@ explain select count(distinct double_col) from 
functional.alltypesagg;
 '00:SCAN HDFS [functional.alltypesagg]'
 ====
 ---- QUERY
-# IMPALA-5591: This shouldn't throw an error.
-set scratch_limit=-1;
-====
----- QUERY
-# Power of two max_row_size.
-set max_row_size=8m;
-set;
----- RESULTS: VERIFY_IS_SUBSET
-'MAX_ROW_SIZE','8388608'
----- TYPES
-STRING,STRING
-====
----- QUERY
-# Non power of two max_row_size.
-set max_row_size=12345;
-set;
----- RESULTS: VERIFY_IS_SUBSET
-'MAX_ROW_SIZE','12345'
----- TYPES
-STRING,STRING
-====
----- QUERY
-set max_row_size=8m;
-====
----- QUERY
 set max_row_size=-1;
 ---- CATCH
 Max row size must be a positive number of bytes: -1

Reply via email to