This is an automated email from the ASF dual-hosted git repository.

mgreber pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 89ebb94e4a1b79557b3d3e8482edcd0352bf6968
Author: Alexey Serbin <[email protected]>
AuthorDate: Wed Oct 30 12:43:37 2024 -0700

    KUDU-3623 do not crash on invalid input for 'kudu table copy'
    
    This patch adds code to handle invalid input for the --predicates flag
    of the 'kudu table copy' tool.  Now, instead of crashing on any
    non-expected input, the tool exists gracefully and reports an actionable
    error message.  With that, now it's easier to understand what was wrong,
    correct typos/mistakes, and re-run the tool again, eventually achieving
    the desired goal.
    
    A new test is added to cover the newly introduced functionality.
    
    This is a follow-up to 0afeddf9e530762e0e47beb7428982763715c746.
    
    Change-Id: I96ee4c38f3074b44ce89ac1b18784058e9148daf
    Reviewed-on: http://gerrit.cloudera.org:8080/22009
    Tested-by: Alexey Serbin <[email protected]>
    Reviewed-by: Zoltan Chovan <[email protected]>
    Reviewed-by: Marton Greber <[email protected]>
---
 src/kudu/tools/kudu-tool-test.cc | 109 +++++++++++++++++
 src/kudu/tools/table_scanner.cc  | 254 ++++++++++++++++++++++++++-------------
 2 files changed, 278 insertions(+), 85 deletions(-)

diff --git a/src/kudu/tools/kudu-tool-test.cc b/src/kudu/tools/kudu-tool-test.cc
index 32522559f..f3a7e6556 100644
--- a/src/kudu/tools/kudu-tool-test.cc
+++ b/src/kudu/tools/kudu-tool-test.cc
@@ -6037,6 +6037,115 @@ TEST_F(ToolTest, TestScanTableMultiPredicates) {
   ASSERT_LE(lines.size(), mid);
 }
 
+TEST_F(ToolTest, InvalidPredicates) {
+  constexpr const char* const kTableName = "mistyped.predicates";
+
+  // The container of pairs below represents test sub-scenarios, where the 
first
+  // element of the pair is the predicate as serialized JSON array, and the
+  // second element is the substring to match the error message.
+  const vector<pair<string, string>> scenarios_info = {
+    {
+      R"*([">","key","a"])*",
+      R"*(Invalid argument: >: only 'AND' is supported as predicate operator)*"
+    },
+    {
+      R"*([[">","key","a"]])*",
+      R"*(Invalid argument: [">","key","a"]: predicate name must be a string)*"
+    },
+    {
+      R"*(["AND",">","key","a"])*",
+      R"*(Invalid argument: ">": expected JSON array for predicates)*"
+    },
+    {
+      R"*(["AND",["="]])*",
+      R"*(Invalid argument: ["="]: malformed predicate)*"
+    },
+    {
+      R"*(["AND",["=","key",100,200]])*",
+      R"*(Invalid argument: ["=","key",100,200]: malformed predicate)*"
+    },
+    {
+      R"*(["AND",["=","key",{}]])*",
+      R"*(Invalid argument: {}: expected value of type 'int')*"
+    },
+    {
+      R"*(["AND",[100,"key",">"]])*",
+      R"*(Invalid argument: 100: predicate name must be a string)*"
+    },
+    {
+      R"*(["AND",["key",100,">"]])*",
+      R"*(Invalid argument: 100: column name must be a string)*"
+    },
+    {
+      R"*(["AND",[">","key","a"]])*",
+      R"*(Invalid argument: "a": expected value of type 'int')*"
+    },
+    {
+      R"*(["AND",[">","key",null]])*",
+      R"*(Invalid argument: null: expected value of type 'int')*"
+    },
+    {
+      R"*(["AND",["=","key","100"]])*",
+      R"*(Invalid argument: "100": expected value of type 'int')*"
+    },
+    {
+      R"*(["AND",[">","string_val",100500]])*",
+      R"*(Invalid argument: 100500: expected value of type 'string')*"
+    },
+    {
+      R"*(["AND",["=","key"]])*",
+      R"*(Invalid argument: missing value for range/equality predicate)*"
+    },
+    {
+      R"*(["AND",["IN","key"]])*",
+      R"*(Invalid argument: missing value for IN (in-list) predicate)*"
+    },
+    {
+      R"*(["AND",["IN","key",100]])*",
+      R"*(100: expecting an array for IN (in-list) predicate values)*"
+    },
+    {
+      R"*(["AND",["IN","key",{}]])*",
+      R"*({}: expecting an array for IN (in-list) predicate values)*"
+    },
+    {
+      R"*(["AND",["<","string_val"]])*",
+      R"*(Invalid argument: missing value for range/equality predicate)*"
+    },
+    {
+      R"*(["AND",["NULL","string_val",0]])*",
+      R"*(Invalid argument: '0': unexpected value for NULL/NOT NULL 
predicate)*"
+    },
+    {
+      R"*(["AND",["==","key","100"]])*",
+      R"*(Invalid argument: '==': unsupported predicate)*"
+    },
+  };
+
+  NO_FATALS(StartExternalMiniCluster());
+
+  // Create the test table.
+  TestWorkload ww(cluster_.get());
+  ww.set_table_name(kTableName);
+  // Just a single tablet server is running in this scenario.
+  ww.set_num_replicas(1);
+  ww.Setup();
+
+  const auto& master_rpc_addr = 
cluster_->master()->bound_rpc_addr().ToString();
+  for (const auto& [predicate_str, expected_err_substring] : scenarios_info) {
+    SCOPED_TRACE(predicate_str);
+    string stderr;
+    const auto s = RunActionStderrString(
+        Substitute("table scan $0 $1 --columns=key,string_val -predicates=$2",
+                   master_rpc_addr,
+                   kTableName,
+                   predicate_str),
+        &stderr);
+    ASSERT_TRUE(s.IsRuntimeError());
+    ASSERT_STR_CONTAINS(stderr, expected_err_substring);
+  }
+}
+
 TEST_F(ToolTest, TableScanRowCountOnly) {
   constexpr const char* const kTableName = "kudu.table.scan.row_count_only";
   // Be specific about the number of threads even if it matches the default
diff --git a/src/kudu/tools/table_scanner.cc b/src/kudu/tools/table_scanner.cc
index 46e293c91..4ca1077db 100644
--- a/src/kudu/tools/table_scanner.cc
+++ b/src/kudu/tools/table_scanner.cc
@@ -55,6 +55,7 @@
 #include "kudu/gutil/strings/split.h"
 #include "kudu/gutil/strings/substitute.h"
 #include "kudu/util/bitmap.h"
+#include "kudu/util/easy_json.h"
 #include "kudu/util/jsonreader.h"
 #include "kudu/util/logging.h"
 #include "kudu/util/memory/arena.h"
@@ -242,93 +243,149 @@ PredicateType ParsePredicateType(const string& 
predicate_type) {
     return PredicateType::IsNotNull;
   } else if (predicate_type_uc == "IN") {
     return PredicateType::InList;
-  } else {
-    LOG(FATAL) << Substitute("unhandled predicate type $0", predicate_type);
-    return PredicateType::None;
   }
+  return PredicateType::None;
 }
 
-KuduValue* ParseValue(KuduColumnSchema::DataType type,
-                      const rapidjson::Value* value) {
-  CHECK(value != nullptr);
+Status ParseValue(const rapidjson::Value& value,
+                  KuduColumnSchema::DataType type,
+                  unique_ptr<KuduValue>* out) {
+  DCHECK(out);
+
   switch (type) {
     case KuduColumnSchema::DataType::INT8:
     case KuduColumnSchema::DataType::INT16:
     case KuduColumnSchema::DataType::INT32:
-      CHECK(value->IsInt());
-      return KuduValue::FromInt(value->GetInt());
+      if (!value.IsInt()) {
+        return Status::InvalidArgument(Substitute(
+            "$0: expected value of type 'int'", EasyJson::ToString(value)));
+      }
+      out->reset(KuduValue::FromInt(value.GetInt()));
+      break;
     case KuduColumnSchema::DataType::INT64:
-      CHECK(value->IsInt64());
-      return KuduValue::FromInt(value->GetInt64());
+      if (!value.IsInt64()) {
+        return Status::InvalidArgument(Substitute(
+            "$0: expected value of type 'int64'", EasyJson::ToString(value)));
+      }
+      out->reset(KuduValue::FromInt(value.GetInt64()));
+      break;
     case KuduColumnSchema::DataType::STRING:
-      CHECK(value->IsString());
-      return KuduValue::CopyString(value->GetString());
+      if (!value.IsString()) {
+        return Status::InvalidArgument(Substitute(
+            "$0: expected value of type 'string'", EasyJson::ToString(value)));
+      }
+      out->reset(KuduValue::CopyString(value.GetString()));
+      break;
     case KuduColumnSchema::DataType::BOOL:
-      CHECK(value->IsBool());
-      return KuduValue::FromBool(value->GetBool());
+      if (!value.IsBool()) {
+        return Status::InvalidArgument(Substitute(
+            "$0: expected value of type 'bool'", EasyJson::ToString(value)));
+      }
+      out->reset(KuduValue::FromBool(value.GetBool()));
+      break;
     case KuduColumnSchema::DataType::FLOAT:
-      CHECK(value->IsDouble());
-      return KuduValue::FromFloat(static_cast<float>(value->GetDouble()));
+      if (!value.IsFloat()) {
+        return Status::InvalidArgument(Substitute(
+            "$0: expected value of type 'float'", EasyJson::ToString(value)));
+      }
+      out->reset(KuduValue::FromFloat(value.GetFloat()));
+      break;
     case KuduColumnSchema::DataType::DOUBLE:
-      CHECK(value->IsDouble());
-      return KuduValue::FromDouble(value->GetDouble());
-    default:
-      LOG(FATAL) << Substitute("unhandled data type $0", type);
+      if (!value.IsDouble()) {
+        return Status::InvalidArgument(Substitute(
+            "$0: expected value of type 'double'", EasyJson::ToString(value)));
+      }
+      out->reset(KuduValue::FromDouble(value.GetDouble()));
+      break;
+    default: {
+      auto s = Status::NotSupported(Substitute("unsupported column type $0", 
type));
+      LOG(DFATAL) << s.ToString();
+      return s;
+    }
   }
 
-  return nullptr;
+  return Status::OK();
 }
 
-KuduPredicate* NewComparisonPredicate(const client::sp::shared_ptr<KuduTable>& 
table,
-                                      KuduColumnSchema::DataType type,
-                                      const string& predicate_type,
-                                      const string& column_name,
-                                      const rapidjson::Value* value) {
-  KuduValue* kudu_value = ParseValue(type, value);
-  CHECK(kudu_value != nullptr);
+Status NewComparisonPredicate(const client::sp::shared_ptr<KuduTable>& table,
+                              const string& column_name,
+                              KuduColumnSchema::DataType column_type,
+                              const string& comparison_op_str,
+                              const rapidjson::Value& value,
+                              unique_ptr<KuduPredicate>* out) {
+  DCHECK(out);
+  unique_ptr<KuduValue> kudu_value;
+  RETURN_NOT_OK(ParseValue(value, column_type, &kudu_value));
   KuduPredicate::ComparisonOp cop;
-  if (predicate_type == "<") {
+  if (comparison_op_str == "<") {
     cop = KuduPredicate::ComparisonOp::LESS;
-  } else if (predicate_type == "<=") {
+  } else if (comparison_op_str == "<=") {
     cop = KuduPredicate::ComparisonOp::LESS_EQUAL;
-  } else if (predicate_type == "=") {
+  } else if (comparison_op_str == "=") {
     cop = KuduPredicate::ComparisonOp::EQUAL;
-  } else if (predicate_type == ">") {
+  } else if (comparison_op_str == ">") {
     cop = KuduPredicate::ComparisonOp::GREATER;
-  } else if (predicate_type == ">=") {
+  } else if (comparison_op_str == ">=") {
     cop = KuduPredicate::ComparisonOp::GREATER_EQUAL;
   } else {
-    return nullptr;
+    return Status::NotSupported(Substitute("'$0': unsupported comparison 
operator",
+                                           comparison_op_str));
   }
-  return table->NewComparisonPredicate(column_name, cop, kudu_value);
+
+  out->reset(table->NewComparisonPredicate(column_name, cop, 
kudu_value.release()));
+  return Status::OK();
 }
 
-KuduPredicate* NewIsNullPredicate(const client::sp::shared_ptr<KuduTable>& 
table,
-                                  const string& column_name,
-                                  PredicateType pt) {
+Status NewIsNullPredicate(const client::sp::shared_ptr<KuduTable>& table,
+                          const string& column_name,
+                          PredicateType pt,
+                          unique_ptr<KuduPredicate>* out) {
+  DCHECK(out);
   switch (pt) {
     case PredicateType::IsNotNull:
-      return table->NewIsNotNullPredicate(column_name);
+      out->reset(table->NewIsNotNullPredicate(column_name));
+      break;
     case PredicateType::IsNull:
-      return table->NewIsNullPredicate(column_name);
+      out->reset(table->NewIsNullPredicate(column_name));
+      break;
     default:
-      return nullptr;
+      DCHECK(false);
+      return Status::NotSupported(
+          Substitute("$0: unsupported nullability predicate", 
static_cast<uint16_t>(pt)));
   }
+  return Status::OK();
 }
 
-KuduPredicate* NewInListPredicate(const client::sp::shared_ptr<KuduTable>& 
table,
-                                  KuduColumnSchema::DataType type,
-                                  const string& name,
-                                  const JsonReader& reader,
-                                  const rapidjson::Value *object) {
-  CHECK(object->IsArray());
+Status NewInListPredicate(const client::sp::shared_ptr<KuduTable>& table,
+                          const string& column_name,
+                          KuduColumnSchema::DataType column_type,
+                          const rapidjson::Value& object,
+                          const JsonReader& reader,
+                          unique_ptr<KuduPredicate>* out) {
+  if (!object.IsArray()) {
+    return Status::InvalidArgument(Substitute(
+        "$0: expecting an array for IN (in-list) predicate values",
+        EasyJson::ToString(object)));
+  }
   vector<const rapidjson::Value*> values;
-  reader.ExtractObjectArray(object, nullptr, &values);
-  vector<KuduValue *> kudu_values;
-  for (const auto& value : values) {
-    kudu_values.emplace_back(ParseValue(type, value));
+  reader.ExtractObjectArray(&object, nullptr, &values);
+  // Using vector of auto-pointers to avoid memory leakage if ParseValue()
+  // returns non-OK status.
+  vector<unique_ptr<KuduValue>> kudu_values;
+  for (const auto* v : values) {
+    unique_ptr<KuduValue> kudu_value;
+    RETURN_NOT_OK(ParseValue(*v, column_type, &kudu_value));
+    kudu_values.emplace_back(std::move(kudu_value));
   }
-  return table->NewInListPredicate(name, &kudu_values);
+
+  vector<KuduValue*> kudu_values_raw;
+  kudu_values_raw.reserve(kudu_values.size());
+  for (auto& v : kudu_values) {
+    kudu_values_raw.emplace_back(v.release());
+  }
+  out->reset(table->NewInListPredicate(column_name, &kudu_values_raw));
+
+  return Status::OK();
 }
 
 Status AddPredicate(const client::sp::shared_ptr<KuduTable>& table,
@@ -341,35 +398,46 @@ Status AddPredicate(const 
client::sp::shared_ptr<KuduTable>& table,
     return Status::OK();
   }
 
-  Schema schema_internal = KuduSchema::ToSchema(table->schema());
-  int idx = schema_internal.find_column(column_name);
-  if (PREDICT_FALSE(idx == Schema::kColumnNotFound)) {
+  const Schema schema_internal = KuduSchema::ToSchema(table->schema());
+  const int idx = schema_internal.find_column(column_name);
+  if (idx == Schema::kColumnNotFound) {
     return Status::NotFound("no such column", column_name);
   }
-  auto type = table->schema().Column(static_cast<size_t>(idx)).type();
-  KuduPredicate* predicate = nullptr;
-  PredicateType pt = ParsePredicateType(predicate_type);
+  const auto column_type = 
table->schema().Column(static_cast<size_t>(idx)).type();
+  const PredicateType pt = ParsePredicateType(predicate_type);
+  unique_ptr<KuduPredicate> predicate;
   switch (pt) {
     case PredicateType::Equality:
     case PredicateType::Range:
-      CHECK(value);
-      predicate = NewComparisonPredicate(table, type, predicate_type, 
column_name, *value);
+      if (!value.has_value()) {
+        return Status::InvalidArgument("missing value for range/equality 
predicate");
+      }
+      RETURN_NOT_OK(NewComparisonPredicate(
+          table, column_name, column_type, predicate_type, *value.value(), 
&predicate));
       break;
     case PredicateType::IsNotNull:
     case PredicateType::IsNull:
-      CHECK(!value);
-      predicate = NewIsNullPredicate(table, column_name, pt);
+      if (value.has_value()) {
+        return Status::InvalidArgument(Substitute(
+            "'$0': unexpected value for NULL/NOT NULL predicate",
+            EasyJson::ToString(*value.value())));
+      }
+      RETURN_NOT_OK(NewIsNullPredicate(table, column_name, pt, &predicate));
       break;
     case PredicateType::InList: {
-      CHECK(value);
-      predicate = NewInListPredicate(table, type, column_name, reader, *value);
+      if (!value.has_value()) {
+        return Status::InvalidArgument("missing value for IN (in-list) 
predicate");
+      }
+      RETURN_NOT_OK(NewInListPredicate(
+          table, column_name, column_type, *value.value(), reader, 
&predicate));
       break;
     }
     default:
-      return Status::NotSupported(Substitute("not support predicate_type $0", 
predicate_type));
+      return Status::InvalidArgument(
+          Substitute("'$0': unsupported predicate", predicate_type));
   }
-  CHECK(predicate);
-  RETURN_NOT_OK(builder->AddConjunctPredicate(predicate));
+  DCHECK(predicate);
+  RETURN_NOT_OK(builder->AddConjunctPredicate(predicate.release()));
 
   return Status::OK();
 }
@@ -385,35 +453,51 @@ Status AddPredicates(const 
client::sp::shared_ptr<KuduTable>& table,
   RETURN_NOT_OK(reader.ExtractObjectArray(reader.root(),
                                           nullptr,
                                           &predicate_objects));
+  static constexpr const char* const kFmtErrPredicateName =
+      "$0: predicate name must be a string";
   vector<unique_ptr<KuduPredicate>> predicates;
-  for (int i = 0; i < predicate_objects.size(); ++i) {
+  for (size_t i = 0; i < predicate_objects.size(); ++i) {
+    const auto* obj = predicate_objects[i];
     if (i == 0) {
-      CHECK(predicate_objects[i]->IsString());
+      if (!obj->IsString()) {
+        return Status::InvalidArgument(Substitute(
+            kFmtErrPredicateName, EasyJson::ToString(*obj)));
+      }
       string op;
-      ToUpperCase(predicate_objects[i]->GetString(), &op);
+      ToUpperCase(obj->GetString(), &op);
       if (op != "AND") {
-        return Status::InvalidArgument(Substitute("only 'AND' operator is 
supported now"));
+        return Status::InvalidArgument(Substitute(
+            "$0: only 'AND' is supported as predicate operator", op));
       }
       continue;
     }
 
-    CHECK(predicate_objects[i]->IsArray());
+    if (!obj->IsArray()) {
+      return Status::InvalidArgument(Substitute(
+          "$0: expected JSON array for predicates", EasyJson::ToString(*obj)));
+    }
     vector<const rapidjson::Value*> elements;
-    reader.ExtractObjectArray(predicate_objects[i], nullptr, &elements);
-    if (elements.size() == 2 || elements.size() == 3) {
-      CHECK(elements[0]->IsString());
-      CHECK(elements[1]->IsString());
-      RETURN_NOT_OK(AddPredicate(table,
-          elements[0]->GetString(),
-          elements[1]->GetString(),
-          elements.size() == 2 ? nullopt
-                               : optional<const 
rapidjson::Value*>(elements[2]),
-          reader,
-          builder));
-    } else {
+    reader.ExtractObjectArray(obj, nullptr, &elements);
+    if (elements.size() != 2 && elements.size() != 3) {
       return Status::InvalidArgument(
-          Substitute("invalid predicate elements count $0", elements.size()));
+          Substitute("$0: malformed predicate", EasyJson::ToString(*obj)));
+    }
+    if (const auto* elem = elements[0]; !elem->IsString()) {
+      return Status::InvalidArgument(Substitute(
+          kFmtErrPredicateName, EasyJson::ToString(*elem)));
+    }
+    if (const auto* elem = elements[1]; !elem->IsString()) {
+      return Status::InvalidArgument(Substitute(
+          "$0: column name must be a string", EasyJson::ToString(*elem)));
     }
+    RETURN_NOT_OK(AddPredicate(
+        table,
+        elements[0]->GetString(),
+        elements[1]->GetString(),
+        elements.size() == 2 ? nullopt
+                             : optional<const rapidjson::Value*>(elements[2]),
+        reader,
+        builder));
   }
 
   return Status::OK();

Reply via email to