This is an automated email from the ASF dual-hosted git repository.
alexey pushed a commit to branch branch-1.17.x
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/branch-1.17.x by this push:
new f80f6e9f5 KUDU-3623 do not crash on invalid input for 'kudu table copy'
f80f6e9f5 is described below
commit f80f6e9f5c28cb67148c3e6b1a439e2b2a4dd8ab
Author: Alexey Serbin <[email protected]>
AuthorDate: Wed Oct 30 12:43:37 2024 -0700
KUDU-3623 do not crash on invalid input for 'kudu table copy'
This patch adds code to handle invalid input for the --predicates flag
of the 'kudu table copy' tool. Now, instead of crashing on any
non-expected input, the tool exists gracefully and reports an actionable
error message. With that, now it's easier to understand what was wrong,
correct typos/mistakes, and re-run the tool again, eventually achieving
the desired goal.
A new test is added to cover the newly introduced functionality.
This is a follow-up to 0afeddf9e530762e0e47beb7428982763715c746.
Change-Id: I96ee4c38f3074b44ce89ac1b18784058e9148daf
Reviewed-on: http://gerrit.cloudera.org:8080/22009
Tested-by: Alexey Serbin <[email protected]>
Reviewed-by: Zoltan Chovan <[email protected]>
Reviewed-by: Marton Greber <[email protected]>
(cherry picked from commit 89ebb94e4a1b79557b3d3e8482edcd0352bf6968)
Reviewed-on: http://gerrit.cloudera.org:8080/22019
Reviewed-by: Alexey Serbin <[email protected]>
---
src/kudu/tools/kudu-tool-test.cc | 109 +++++++++++++++++
src/kudu/tools/table_scanner.cc | 254 ++++++++++++++++++++++++++-------------
2 files changed, 278 insertions(+), 85 deletions(-)
diff --git a/src/kudu/tools/kudu-tool-test.cc b/src/kudu/tools/kudu-tool-test.cc
index f1b7ff674..1e1c938c1 100644
--- a/src/kudu/tools/kudu-tool-test.cc
+++ b/src/kudu/tools/kudu-tool-test.cc
@@ -5613,6 +5613,115 @@ TEST_F(ToolTest, TestScanTableMultiPredicates) {
ASSERT_LE(lines.size(), mid);
}
+TEST_F(ToolTest, InvalidPredicates) {
+ constexpr const char* const kTableName = "mistyped.predicates";
+
+ // The container of pairs below represents test sub-scenarios, where the
first
+ // element of the pair is the predicate as serialized JSON array, and the
+ // second element is the substring to match the error message.
+ const vector<pair<string, string>> scenarios_info = {
+ {
+ R"*([">","key","a"])*",
+ R"*(Invalid argument: >: only 'AND' is supported as predicate operator)*"
+ },
+ {
+ R"*([[">","key","a"]])*",
+ R"*(Invalid argument: [">","key","a"]: predicate name must be a string)*"
+ },
+ {
+ R"*(["AND",">","key","a"])*",
+ R"*(Invalid argument: ">": expected JSON array for predicates)*"
+ },
+ {
+ R"*(["AND",["="]])*",
+ R"*(Invalid argument: ["="]: malformed predicate)*"
+ },
+ {
+ R"*(["AND",["=","key",100,200]])*",
+ R"*(Invalid argument: ["=","key",100,200]: malformed predicate)*"
+ },
+ {
+ R"*(["AND",["=","key",{}]])*",
+ R"*(Invalid argument: {}: expected value of type 'int')*"
+ },
+ {
+ R"*(["AND",[100,"key",">"]])*",
+ R"*(Invalid argument: 100: predicate name must be a string)*"
+ },
+ {
+ R"*(["AND",["key",100,">"]])*",
+ R"*(Invalid argument: 100: column name must be a string)*"
+ },
+ {
+ R"*(["AND",[">","key","a"]])*",
+ R"*(Invalid argument: "a": expected value of type 'int')*"
+ },
+ {
+ R"*(["AND",[">","key",null]])*",
+ R"*(Invalid argument: null: expected value of type 'int')*"
+ },
+ {
+ R"*(["AND",["=","key","100"]])*",
+ R"*(Invalid argument: "100": expected value of type 'int')*"
+ },
+ {
+ R"*(["AND",[">","string_val",100500]])*",
+ R"*(Invalid argument: 100500: expected value of type 'string')*"
+ },
+ {
+ R"*(["AND",["=","key"]])*",
+ R"*(Invalid argument: missing value for range/equality predicate)*"
+ },
+ {
+ R"*(["AND",["IN","key"]])*",
+ R"*(Invalid argument: missing value for IN (in-list) predicate)*"
+ },
+ {
+ R"*(["AND",["IN","key",100]])*",
+ R"*(100: expecting an array for IN (in-list) predicate values)*"
+ },
+ {
+ R"*(["AND",["IN","key",{}]])*",
+ R"*({}: expecting an array for IN (in-list) predicate values)*"
+ },
+ {
+ R"*(["AND",["<","string_val"]])*",
+ R"*(Invalid argument: missing value for range/equality predicate)*"
+ },
+ {
+ R"*(["AND",["NULL","string_val",0]])*",
+ R"*(Invalid argument: '0': unexpected value for NULL/NOT NULL
predicate)*"
+ },
+ {
+ R"*(["AND",["==","key","100"]])*",
+ R"*(Invalid argument: '==': unsupported predicate)*"
+ },
+ };
+
+ NO_FATALS(StartExternalMiniCluster());
+
+ // Create the test table.
+ TestWorkload ww(cluster_.get());
+ ww.set_table_name(kTableName);
+ // Just a single tablet server is running in this scenario.
+ ww.set_num_replicas(1);
+ ww.Setup();
+
+ const auto& master_rpc_addr =
cluster_->master()->bound_rpc_addr().ToString();
+ for (const auto& [predicate_str, expected_err_substring] : scenarios_info) {
+ SCOPED_TRACE(predicate_str);
+ string stderr;
+ const auto s = RunActionStderrString(
+ Substitute("table scan $0 $1 --columns=key,string_val -predicates=$2",
+ master_rpc_addr,
+ kTableName,
+ predicate_str),
+ &stderr);
+ ASSERT_TRUE(s.IsRuntimeError());
+ ASSERT_STR_CONTAINS(stderr, expected_err_substring);
+ }
+}
+
TEST_F(ToolTest, TableScanRowCountOnly) {
constexpr const char* const kTableName = "kudu.table.scan.row_count_only";
// Be specific about the number of threads even if it matches the default
diff --git a/src/kudu/tools/table_scanner.cc b/src/kudu/tools/table_scanner.cc
index 35b87f70f..a2ae126f5 100644
--- a/src/kudu/tools/table_scanner.cc
+++ b/src/kudu/tools/table_scanner.cc
@@ -54,6 +54,7 @@
#include "kudu/gutil/strings/split.h"
#include "kudu/gutil/strings/substitute.h"
#include "kudu/util/bitmap.h"
+#include "kudu/util/easy_json.h"
#include "kudu/util/jsonreader.h"
#include "kudu/util/logging.h"
#include "kudu/util/memory/arena.h"
@@ -223,93 +224,149 @@ PredicateType ParsePredicateType(const string&
predicate_type) {
return PredicateType::IsNotNull;
} else if (predicate_type_uc == "IN") {
return PredicateType::InList;
- } else {
- LOG(FATAL) << Substitute("unhandled predicate type $0", predicate_type);
- return PredicateType::None;
}
+ return PredicateType::None;
}
-KuduValue* ParseValue(KuduColumnSchema::DataType type,
- const rapidjson::Value* value) {
- CHECK(value != nullptr);
+Status ParseValue(const rapidjson::Value& value,
+ KuduColumnSchema::DataType type,
+ unique_ptr<KuduValue>* out) {
+ DCHECK(out);
+
switch (type) {
case KuduColumnSchema::DataType::INT8:
case KuduColumnSchema::DataType::INT16:
case KuduColumnSchema::DataType::INT32:
- CHECK(value->IsInt());
- return KuduValue::FromInt(value->GetInt());
+ if (!value.IsInt()) {
+ return Status::InvalidArgument(Substitute(
+ "$0: expected value of type 'int'", EasyJson::ToString(value)));
+ }
+ out->reset(KuduValue::FromInt(value.GetInt()));
+ break;
case KuduColumnSchema::DataType::INT64:
- CHECK(value->IsInt64());
- return KuduValue::FromInt(value->GetInt64());
+ if (!value.IsInt64()) {
+ return Status::InvalidArgument(Substitute(
+ "$0: expected value of type 'int64'", EasyJson::ToString(value)));
+ }
+ out->reset(KuduValue::FromInt(value.GetInt64()));
+ break;
case KuduColumnSchema::DataType::STRING:
- CHECK(value->IsString());
- return KuduValue::CopyString(value->GetString());
+ if (!value.IsString()) {
+ return Status::InvalidArgument(Substitute(
+ "$0: expected value of type 'string'", EasyJson::ToString(value)));
+ }
+ out->reset(KuduValue::CopyString(value.GetString()));
+ break;
case KuduColumnSchema::DataType::BOOL:
- CHECK(value->IsBool());
- return KuduValue::FromBool(value->GetBool());
+ if (!value.IsBool()) {
+ return Status::InvalidArgument(Substitute(
+ "$0: expected value of type 'bool'", EasyJson::ToString(value)));
+ }
+ out->reset(KuduValue::FromBool(value.GetBool()));
+ break;
case KuduColumnSchema::DataType::FLOAT:
- CHECK(value->IsDouble());
- return KuduValue::FromFloat(static_cast<float>(value->GetDouble()));
+ if (!value.IsFloat()) {
+ return Status::InvalidArgument(Substitute(
+ "$0: expected value of type 'float'", EasyJson::ToString(value)));
+ }
+ out->reset(KuduValue::FromFloat(value.GetFloat()));
+ break;
case KuduColumnSchema::DataType::DOUBLE:
- CHECK(value->IsDouble());
- return KuduValue::FromDouble(value->GetDouble());
- default:
- LOG(FATAL) << Substitute("unhandled data type $0", type);
+ if (!value.IsDouble()) {
+ return Status::InvalidArgument(Substitute(
+ "$0: expected value of type 'double'", EasyJson::ToString(value)));
+ }
+ out->reset(KuduValue::FromDouble(value.GetDouble()));
+ break;
+ default: {
+ auto s = Status::NotSupported(Substitute("unsupported column type $0",
type));
+ LOG(DFATAL) << s.ToString();
+ return s;
+ }
}
- return nullptr;
+ return Status::OK();
}
-KuduPredicate* NewComparisonPredicate(const client::sp::shared_ptr<KuduTable>&
table,
- KuduColumnSchema::DataType type,
- const string& predicate_type,
- const string& column_name,
- const rapidjson::Value* value) {
- KuduValue* kudu_value = ParseValue(type, value);
- CHECK(kudu_value != nullptr);
+Status NewComparisonPredicate(const client::sp::shared_ptr<KuduTable>& table,
+ const string& column_name,
+ KuduColumnSchema::DataType column_type,
+ const string& comparison_op_str,
+ const rapidjson::Value& value,
+ unique_ptr<KuduPredicate>* out) {
+ DCHECK(out);
+ unique_ptr<KuduValue> kudu_value;
+ RETURN_NOT_OK(ParseValue(value, column_type, &kudu_value));
KuduPredicate::ComparisonOp cop;
- if (predicate_type == "<") {
+ if (comparison_op_str == "<") {
cop = KuduPredicate::ComparisonOp::LESS;
- } else if (predicate_type == "<=") {
+ } else if (comparison_op_str == "<=") {
cop = KuduPredicate::ComparisonOp::LESS_EQUAL;
- } else if (predicate_type == "=") {
+ } else if (comparison_op_str == "=") {
cop = KuduPredicate::ComparisonOp::EQUAL;
- } else if (predicate_type == ">") {
+ } else if (comparison_op_str == ">") {
cop = KuduPredicate::ComparisonOp::GREATER;
- } else if (predicate_type == ">=") {
+ } else if (comparison_op_str == ">=") {
cop = KuduPredicate::ComparisonOp::GREATER_EQUAL;
} else {
- return nullptr;
+ return Status::NotSupported(Substitute("'$0': unsupported comparison
operator",
+ comparison_op_str));
}
- return table->NewComparisonPredicate(column_name, cop, kudu_value);
+
+ out->reset(table->NewComparisonPredicate(column_name, cop,
kudu_value.release()));
+ return Status::OK();
}
-KuduPredicate* NewIsNullPredicate(const client::sp::shared_ptr<KuduTable>&
table,
- const string& column_name,
- PredicateType pt) {
+Status NewIsNullPredicate(const client::sp::shared_ptr<KuduTable>& table,
+ const string& column_name,
+ PredicateType pt,
+ unique_ptr<KuduPredicate>* out) {
+ DCHECK(out);
switch (pt) {
case PredicateType::IsNotNull:
- return table->NewIsNotNullPredicate(column_name);
+ out->reset(table->NewIsNotNullPredicate(column_name));
+ break;
case PredicateType::IsNull:
- return table->NewIsNullPredicate(column_name);
+ out->reset(table->NewIsNullPredicate(column_name));
+ break;
default:
- return nullptr;
+ DCHECK(false);
+ return Status::NotSupported(
+ Substitute("$0: unsupported nullability predicate",
static_cast<uint16_t>(pt)));
}
+ return Status::OK();
}
-KuduPredicate* NewInListPredicate(const client::sp::shared_ptr<KuduTable>&
table,
- KuduColumnSchema::DataType type,
- const string& name,
- const JsonReader& reader,
- const rapidjson::Value *object) {
- CHECK(object->IsArray());
+Status NewInListPredicate(const client::sp::shared_ptr<KuduTable>& table,
+ const string& column_name,
+ KuduColumnSchema::DataType column_type,
+ const rapidjson::Value& object,
+ const JsonReader& reader,
+ unique_ptr<KuduPredicate>* out) {
+ if (!object.IsArray()) {
+ return Status::InvalidArgument(Substitute(
+ "$0: expecting an array for IN (in-list) predicate values",
+ EasyJson::ToString(object)));
+ }
vector<const rapidjson::Value*> values;
- reader.ExtractObjectArray(object, nullptr, &values);
- vector<KuduValue *> kudu_values;
- for (const auto& value : values) {
- kudu_values.emplace_back(ParseValue(type, value));
+ reader.ExtractObjectArray(&object, nullptr, &values);
+ // Using vector of auto-pointers to avoid memory leakage if ParseValue()
+ // returns non-OK status.
+ vector<unique_ptr<KuduValue>> kudu_values;
+ for (const auto* v : values) {
+ unique_ptr<KuduValue> kudu_value;
+ RETURN_NOT_OK(ParseValue(*v, column_type, &kudu_value));
+ kudu_values.emplace_back(std::move(kudu_value));
}
- return table->NewInListPredicate(name, &kudu_values);
+
+ vector<KuduValue*> kudu_values_raw;
+ kudu_values_raw.reserve(kudu_values.size());
+ for (auto& v : kudu_values) {
+ kudu_values_raw.emplace_back(v.release());
+ }
+ out->reset(table->NewInListPredicate(column_name, &kudu_values_raw));
+
+ return Status::OK();
}
Status AddPredicate(const client::sp::shared_ptr<KuduTable>& table,
@@ -322,35 +379,46 @@ Status AddPredicate(const
client::sp::shared_ptr<KuduTable>& table,
return Status::OK();
}
- Schema schema_internal = KuduSchema::ToSchema(table->schema());
- int idx = schema_internal.find_column(column_name);
- if (PREDICT_FALSE(idx == Schema::kColumnNotFound)) {
+ const Schema schema_internal = KuduSchema::ToSchema(table->schema());
+ const int idx = schema_internal.find_column(column_name);
+ if (idx == Schema::kColumnNotFound) {
return Status::NotFound("no such column", column_name);
}
- auto type = table->schema().Column(static_cast<size_t>(idx)).type();
- KuduPredicate* predicate = nullptr;
- PredicateType pt = ParsePredicateType(predicate_type);
+ const auto column_type =
table->schema().Column(static_cast<size_t>(idx)).type();
+ const PredicateType pt = ParsePredicateType(predicate_type);
+ unique_ptr<KuduPredicate> predicate;
switch (pt) {
case PredicateType::Equality:
case PredicateType::Range:
- CHECK(value);
- predicate = NewComparisonPredicate(table, type, predicate_type,
column_name, *value);
+ if (!value.has_value()) {
+ return Status::InvalidArgument("missing value for range/equality
predicate");
+ }
+ RETURN_NOT_OK(NewComparisonPredicate(
+ table, column_name, column_type, predicate_type, *value.value(),
&predicate));
break;
case PredicateType::IsNotNull:
case PredicateType::IsNull:
- CHECK(!value);
- predicate = NewIsNullPredicate(table, column_name, pt);
+ if (value.has_value()) {
+ return Status::InvalidArgument(Substitute(
+ "'$0': unexpected value for NULL/NOT NULL predicate",
+ EasyJson::ToString(*value.value())));
+ }
+ RETURN_NOT_OK(NewIsNullPredicate(table, column_name, pt, &predicate));
break;
case PredicateType::InList: {
- CHECK(value);
- predicate = NewInListPredicate(table, type, column_name, reader, *value);
+ if (!value.has_value()) {
+ return Status::InvalidArgument("missing value for IN (in-list)
predicate");
+ }
+ RETURN_NOT_OK(NewInListPredicate(
+ table, column_name, column_type, *value.value(), reader,
&predicate));
break;
}
default:
- return Status::NotSupported(Substitute("not support predicate_type $0",
predicate_type));
+ return Status::InvalidArgument(
+ Substitute("'$0': unsupported predicate", predicate_type));
}
- CHECK(predicate);
- RETURN_NOT_OK(builder->AddConjunctPredicate(predicate));
+ DCHECK(predicate);
+ RETURN_NOT_OK(builder->AddConjunctPredicate(predicate.release()));
return Status::OK();
}
@@ -366,35 +434,51 @@ Status AddPredicates(const
client::sp::shared_ptr<KuduTable>& table,
RETURN_NOT_OK(reader.ExtractObjectArray(reader.root(),
nullptr,
&predicate_objects));
+ static constexpr const char* const kFmtErrPredicateName =
+ "$0: predicate name must be a string";
vector<unique_ptr<KuduPredicate>> predicates;
- for (int i = 0; i < predicate_objects.size(); ++i) {
+ for (size_t i = 0; i < predicate_objects.size(); ++i) {
+ const auto* obj = predicate_objects[i];
if (i == 0) {
- CHECK(predicate_objects[i]->IsString());
+ if (!obj->IsString()) {
+ return Status::InvalidArgument(Substitute(
+ kFmtErrPredicateName, EasyJson::ToString(*obj)));
+ }
string op;
- ToUpperCase(predicate_objects[i]->GetString(), &op);
+ ToUpperCase(obj->GetString(), &op);
if (op != "AND") {
- return Status::InvalidArgument(Substitute("only 'AND' operator is
supported now"));
+ return Status::InvalidArgument(Substitute(
+ "$0: only 'AND' is supported as predicate operator", op));
}
continue;
}
- CHECK(predicate_objects[i]->IsArray());
+ if (!obj->IsArray()) {
+ return Status::InvalidArgument(Substitute(
+ "$0: expected JSON array for predicates", EasyJson::ToString(*obj)));
+ }
vector<const rapidjson::Value*> elements;
- reader.ExtractObjectArray(predicate_objects[i], nullptr, &elements);
- if (elements.size() == 2 || elements.size() == 3) {
- CHECK(elements[0]->IsString());
- CHECK(elements[1]->IsString());
- RETURN_NOT_OK(AddPredicate(table,
- elements[0]->GetString(),
- elements[1]->GetString(),
- elements.size() == 2 ? nullopt
- : optional<const
rapidjson::Value*>(elements[2]),
- reader,
- builder));
- } else {
+ reader.ExtractObjectArray(obj, nullptr, &elements);
+ if (elements.size() != 2 && elements.size() != 3) {
return Status::InvalidArgument(
- Substitute("invalid predicate elements count $0", elements.size()));
+ Substitute("$0: malformed predicate", EasyJson::ToString(*obj)));
+ }
+ if (const auto* elem = elements[0]; !elem->IsString()) {
+ return Status::InvalidArgument(Substitute(
+ kFmtErrPredicateName, EasyJson::ToString(*elem)));
+ }
+ if (const auto* elem = elements[1]; !elem->IsString()) {
+ return Status::InvalidArgument(Substitute(
+ "$0: column name must be a string", EasyJson::ToString(*elem)));
}
+ RETURN_NOT_OK(AddPredicate(
+ table,
+ elements[0]->GetString(),
+ elements[1]->GetString(),
+ elements.size() == 2 ? nullopt
+ : optional<const rapidjson::Value*>(elements[2]),
+ reader,
+ builder));
}
return Status::OK();