github-actions[bot] commented on code in PR #61882:
URL: https://github.com/apache/doris/pull/61882#discussion_r3013311478
##########
cloud/src/meta-service/meta_service_resource.cpp:
##########
@@ -2148,6 +2148,175 @@ std::pair<MetaServiceCode, std::string>
handle_snapshot_intervals(const std::str
return std::make_pair(MetaServiceCode::OK, "");
}
+static std::pair<MetaServiceCode, std::string> get_instance_info(Transaction*
txn,
+ const
std::string& instance_id,
+
InstanceInfoPB* instance) {
+ std::string instance_val;
+ TxnErrorCode err = txn->get(instance_key({instance_id}), &instance_val);
+ if (err != TxnErrorCode::TXN_OK) {
+ return {cast_as<ErrCategory::READ>(err),
+ fmt::format("failed to get instance, instance_id={}, err={}",
instance_id, err)};
+ }
+ if (!instance->ParseFromString(instance_val)) {
+ return {MetaServiceCode::PROTOBUF_PARSE_ERR,
+ fmt::format("failed to parse instance proto, instance_id={}",
instance_id)};
+ }
+ return {MetaServiceCode::OK, ""};
+}
+
+// Traverses the predecessor chain of the given instance, populating
`predecessors` with the
+// instance info of each predecessor.
+static std::pair<MetaServiceCode, std::string> get_predecessor_instances(
+ Transaction* txn, const InstanceInfoPB& instance,
+ std::vector<InstanceInfoPB>* predecessors) {
+ std::string instance_id = instance.original_instance_id();
+ while (!instance_id.empty() && instance_id != instance.instance_id()) {
+ InstanceInfoPB current_instance;
+ auto [code, msg] = get_instance_info(txn, instance_id,
¤t_instance);
+ if (code != MetaServiceCode::OK) {
Review Comment:
**Bug (potential infinite loop): `get_predecessor_instances` lacks cycle
detection.**
If the predecessor chain contains a cycle due to corrupted metadata (e.g.,
instance A -> B -> A), this loop will never terminate. Compare with
`get_all_snapshots` in `snapshot_manager.cpp` which correctly uses a `visited`
set to detect cycles.
Suggested fix: add a `std::unordered_set<std::string> visited` and check
before each iteration:
```cpp
std::unordered_set<std::string> visited;
while (!instance_id.empty() && instance_id != instance.instance_id()) {
if (visited.count(instance_id) > 0) {
return {MetaServiceCode::INVALID_ARGUMENT,
fmt::format("cycle detected in instance chain,
instance_id={}", instance_id)};
}
visited.insert(instance_id);
// ... rest of loop
}
```
##########
cloud/src/snapshot/snapshot_manager.cpp:
##########
@@ -262,4 +264,98 @@ int
SnapshotManager::compact_snapshot_chains(InstanceChainCompactor* compactor)
return -1;
}
+static std::pair<MetaServiceCode, std::string> get_instance(Transaction* txn,
+ std::string_view
instance_id,
+ InstanceInfoPB*
instance_info) {
+ InstanceKeyInfo instance_key_info {instance_id};
+ std::string key = instance_key(instance_key_info);
+ std::string val;
+ TxnErrorCode err = txn->get(key, &val);
+ if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
+ return {MetaServiceCode::INVALID_ARGUMENT,
+ fmt::format("instance not found, instance_id={}",
instance_id)};
+ } else if (err != TxnErrorCode::TXN_OK) {
+ return {cast_as<ErrCategory::READ>(err),
+ fmt::format("failed to get instance, instance_id={}, err={}",
instance_id, err)};
+ }
+
+ if (!instance_info->ParseFromString(val)) {
+ return {MetaServiceCode::INVALID_ARGUMENT, "failed to parse instance
info"};
+ }
+ return {MetaServiceCode::OK, ""};
+}
+
+std::pair<MetaServiceCode, std::string> SnapshotManager::get_all_snapshots(
+ Transaction* txn, std::string_view instance_id, std::string_view
required_snapshot_id,
+ std::vector<std::pair<SnapshotPB, Versionstamp>>* snapshots) {
+ Versionstamp required_snapshot_versionstamp;
+ if (!required_snapshot_id.empty()) {
+ if (!parse_snapshot_versionstamp(required_snapshot_id,
&required_snapshot_versionstamp)) {
+ return {MetaServiceCode::INVALID_ARGUMENT, "invalid snapshot_id
format"};
+ }
+ }
+
+ InstanceInfoPB instance_info;
+ auto [code, error_msg] = get_instance(txn, instance_id, &instance_info);
+ if (code != MetaServiceCode::OK) {
+ return {code, error_msg};
+ }
+ std::string current_instance_id(instance_id);
+ if (instance_info.has_original_instance_id() &&
!instance_info.original_instance_id().empty()) {
+ // the earliest instance_id for rollback
+ current_instance_id = instance_info.original_instance_id();
+ }
+
+ std::unordered_set<std::string> visited;
+ do {
+ MetaReader meta_reader(current_instance_id);
+ if (required_snapshot_id.empty()) {
+ TxnErrorCode err = meta_reader.get_snapshots(txn, snapshots);
+ if (err != TxnErrorCode::TXN_OK) {
+ return {cast_as<ErrCategory::READ>(err), "failed to get
snapshots"};
+ }
+ } else {
+ SnapshotPB snapshot_pb;
+ TxnErrorCode err =
+ meta_reader.get_snapshot(txn,
required_snapshot_versionstamp, &snapshot_pb);
+ if (err == TxnErrorCode::TXN_OK) {
+ snapshots->emplace_back(snapshot_pb,
required_snapshot_versionstamp);
+ return {MetaServiceCode::OK, ""};
+ } else if (err != TxnErrorCode::TXN_KEY_NOT_FOUND) {
+ return {cast_as<ErrCategory::READ>(err), "failed to get
snapshot"};
+ }
+ }
+ if (current_instance_id == instance_id) {
+ break;
+ }
+ auto [code, error_msg] = get_instance(txn, current_instance_id,
&instance_info);
+ if (code != MetaServiceCode::OK) {
+ std::string msg = fmt::format("failed to get ancestor instance {}:
{}",
+ current_instance_id, error_msg);
Review Comment:
**Warning: Variable shadowing — inner `auto [code, error_msg]` shadows the
outer one from line 299.**
The outer scope (line 299) declares `auto [code, error_msg]`, and this line
declares another `auto [code, error_msg]` inside the do-while loop body. While
this compiles and works correctly (the inner variables are used for the returns
at lines 340-345 and 348-353), it can cause confusion and compiler warnings
with `-Wshadow`.
Consider renaming the inner variables, e.g.:
```cpp
auto [inner_code, inner_error_msg] = get_instance(txn, current_instance_id,
&instance_info);
```
##########
cloud/src/meta-service/meta_service_resource.cpp:
##########
@@ -2148,6 +2148,175 @@ std::pair<MetaServiceCode, std::string>
handle_snapshot_intervals(const std::str
return std::make_pair(MetaServiceCode::OK, "");
}
+static std::pair<MetaServiceCode, std::string> get_instance_info(Transaction*
txn,
+ const
std::string& instance_id,
+
InstanceInfoPB* instance) {
+ std::string instance_val;
Review Comment:
**Code duplication: `get_instance_info` duplicates `get_instance` in
`snapshot_manager.cpp`.**
Both functions do the same thing: fetch an instance by ID from TxnKv and
parse the protobuf. The `get_instance` version in `snapshot_manager.cpp` is
actually better (handles `TXN_KEY_NOT_FOUND` explicitly). Consider factoring
this into a shared utility to avoid divergence.
##########
cloud/src/snapshot/snapshot_manager.cpp:
##########
@@ -262,4 +264,98 @@ int
SnapshotManager::compact_snapshot_chains(InstanceChainCompactor* compactor)
return -1;
}
+static std::pair<MetaServiceCode, std::string> get_instance(Transaction* txn,
+ std::string_view
instance_id,
+ InstanceInfoPB*
instance_info) {
+ InstanceKeyInfo instance_key_info {instance_id};
+ std::string key = instance_key(instance_key_info);
+ std::string val;
+ TxnErrorCode err = txn->get(key, &val);
+ if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
+ return {MetaServiceCode::INVALID_ARGUMENT,
+ fmt::format("instance not found, instance_id={}",
instance_id)};
+ } else if (err != TxnErrorCode::TXN_OK) {
+ return {cast_as<ErrCategory::READ>(err),
+ fmt::format("failed to get instance, instance_id={}, err={}",
instance_id, err)};
+ }
+
+ if (!instance_info->ParseFromString(val)) {
+ return {MetaServiceCode::INVALID_ARGUMENT, "failed to parse instance
info"};
+ }
+ return {MetaServiceCode::OK, ""};
+}
+
+std::pair<MetaServiceCode, std::string> SnapshotManager::get_all_snapshots(
+ Transaction* txn, std::string_view instance_id, std::string_view
required_snapshot_id,
+ std::vector<std::pair<SnapshotPB, Versionstamp>>* snapshots) {
+ Versionstamp required_snapshot_versionstamp;
Review Comment:
**Dead code: `get_all_snapshots` has no callers in this PR or elsewhere in
the codebase.**
This is a new static method added to `SnapshotManager` but nothing calls it.
If it is intended for future use, it should ideally be introduced together with
its callers, or at minimum have a unit test to verify its correctness.
Currently, the chain traversal logic is untested.
##########
cloud/src/meta-service/meta_service_resource.cpp:
##########
@@ -2148,6 +2148,175 @@ std::pair<MetaServiceCode, std::string>
handle_snapshot_intervals(const std::str
return std::make_pair(MetaServiceCode::OK, "");
}
+static std::pair<MetaServiceCode, std::string> get_instance_info(Transaction*
txn,
+ const
std::string& instance_id,
+
InstanceInfoPB* instance) {
+ std::string instance_val;
+ TxnErrorCode err = txn->get(instance_key({instance_id}), &instance_val);
+ if (err != TxnErrorCode::TXN_OK) {
+ return {cast_as<ErrCategory::READ>(err),
+ fmt::format("failed to get instance, instance_id={}, err={}",
instance_id, err)};
+ }
Review Comment:
**Issue: `get_instance_info` does not distinguish `TXN_KEY_NOT_FOUND` from
other errors.**
When a predecessor instance is missing (perhaps already deleted or never
existed), `cast_as<ErrCategory::READ>(TXN_KEY_NOT_FOUND)` returns
`KV_TXN_GET_ERR`, which is a generic internal error. Compare with the
`get_instance` function added in `snapshot_manager.cpp` (line 274) which
explicitly checks `TXN_KEY_NOT_FOUND` and returns `INVALID_ARGUMENT` with a
clear "instance not found" message.
This matters because `get_predecessor_instances` calls this function — if a
predecessor instance is missing, the user gets a confusing `KV_TXN_GET_ERR`
instead of a clear "instance not found" error.
##########
cloud/src/meta-service/meta_service_resource.cpp:
##########
@@ -2148,6 +2148,175 @@ std::pair<MetaServiceCode, std::string>
handle_snapshot_intervals(const std::str
return std::make_pair(MetaServiceCode::OK, "");
}
+static std::pair<MetaServiceCode, std::string> get_instance_info(Transaction*
txn,
+ const
std::string& instance_id,
+
InstanceInfoPB* instance) {
+ std::string instance_val;
+ TxnErrorCode err = txn->get(instance_key({instance_id}), &instance_val);
+ if (err != TxnErrorCode::TXN_OK) {
+ return {cast_as<ErrCategory::READ>(err),
+ fmt::format("failed to get instance, instance_id={}, err={}",
instance_id, err)};
+ }
+ if (!instance->ParseFromString(instance_val)) {
+ return {MetaServiceCode::PROTOBUF_PARSE_ERR,
+ fmt::format("failed to parse instance proto, instance_id={}",
instance_id)};
+ }
+ return {MetaServiceCode::OK, ""};
+}
+
+// Traverses the predecessor chain of the given instance, populating
`predecessors` with the
+// instance info of each predecessor.
+static std::pair<MetaServiceCode, std::string> get_predecessor_instances(
+ Transaction* txn, const InstanceInfoPB& instance,
+ std::vector<InstanceInfoPB>* predecessors) {
+ std::string instance_id = instance.original_instance_id();
+ while (!instance_id.empty() && instance_id != instance.instance_id()) {
+ InstanceInfoPB current_instance;
+ auto [code, msg] = get_instance_info(txn, instance_id,
¤t_instance);
+ if (code != MetaServiceCode::OK) {
+ return {code, std::move(msg)};
+ }
+ instance_id = current_instance.successor_instance_id();
+ predecessors->emplace_back(std::move(current_instance));
+ }
+ return {MetaServiceCode::OK, ""};
+}
+
+static std::pair<MetaServiceCode, std::string> ensure_all_snapshot_recycled(
+ Transaction* txn, const std::string& instance_id,
+ std::optional<std::reference_wrapper<std::unordered_set<std::string>>>
+ exclude_snapshot_set) {
+ MetaReader meta_reader(instance_id);
+ std::vector<std::pair<SnapshotPB, Versionstamp>> snapshots;
+ TxnErrorCode err = meta_reader.get_snapshots(txn, &snapshots);
+ if (err != TxnErrorCode::TXN_OK) {
+ std::string msg = fmt::format("failed to get snapshots, err={}", err);
+ LOG(WARNING) << msg << " instance_id=" << instance_id;
+ return {cast_as<ErrCategory::READ>(err), std::move(msg)};
+ }
+ for (auto& [snapshot, versionstamp] : snapshots) {
+ std::string snapshot_id =
SnapshotManager::serialize_snapshot_id(versionstamp);
+ if (exclude_snapshot_set.has_value() &&
+ exclude_snapshot_set.value().get().count(snapshot_id) > 0) {
+ continue;
+ }
+ if (snapshot.status() != SnapshotStatus::SNAPSHOT_RECYCLED) {
+ std::string msg = fmt::format(
+ "failed to drop instance, instance has snapshots,
snapshot_id={}", snapshot_id);
+ LOG(WARNING) << msg << " instance_id=" << instance_id
+ << " snapshot=" << proto_to_json(snapshot);
+ return {MetaServiceCode::INVALID_ARGUMENT, std::move(msg)};
+ }
+ }
+ return {MetaServiceCode::OK, ""};
+}
+
+static std::pair<MetaServiceCode, std::string> drop_single_instance(const
std::string& instance_id,
+
Transaction* txn,
+
InstanceInfoPB* instance) {
+ auto [code, msg] = ensure_all_snapshot_recycled(txn, instance_id,
std::nullopt);
+ if (code != MetaServiceCode::OK) {
+ return {code, std::move(msg)};
+ }
+
+ instance->set_status(InstanceInfoPB::DELETED);
+
instance->set_mtime(duration_cast<seconds>(system_clock::now().time_since_epoch()).count());
+
+ std::string serialized = instance->SerializeAsString();
+ if (serialized.empty()) {
+ std::string msg = "failed to serialize";
+ LOG(ERROR) << msg;
+ return {MetaServiceCode::PROTOBUF_SERIALIZE_ERR, std::move(msg)};
+ }
+ LOG(INFO) << "drop instance_id=" << instance_id << " json=" <<
proto_to_json(*instance);
+
+ if (instance->has_source_instance_id() &&
instance->has_source_snapshot_id() &&
+ !instance->source_instance_id().empty() &&
!instance->source_snapshot_id().empty()) {
+ Versionstamp snapshot_versionstamp;
+ if
(!SnapshotManager::parse_snapshot_versionstamp(instance->source_snapshot_id(),
+
&snapshot_versionstamp)) {
+ std::string msg = "failed to parse snapshot_id to versionstamp,
snapshot_id=" +
+ instance->source_snapshot_id();
+ LOG(WARNING) << msg;
+ return {MetaServiceCode::INVALID_ARGUMENT, std::move(msg)};
+ }
+ std::string snapshot_ref_key = versioned::snapshot_reference_key(
+ {instance->source_instance_id(), snapshot_versionstamp,
instance_id});
+ txn->remove(snapshot_ref_key);
+ }
+ return {MetaServiceCode::OK, std::move(serialized)};
+}
+
+// Handles the instance-chain drop: all instances from the original up to and
including the tail
+// are marked DELETED atomically within the same transaction.
+static std::pair<MetaServiceCode, std::string> drop_instance_chain(
+ const std::string& tail_instance_id, Transaction* txn, InstanceInfoPB*
tail_instance) {
+ std::vector<InstanceInfoPB> predecessors;
+ if (auto [code, err] = get_predecessor_instances(txn, *tail_instance,
&predecessors);
+ code != MetaServiceCode::OK) {
+ LOG(WARNING) << "drop instance chain: " << err;
+ return {code, std::move(err)};
+ }
+
+ std::unordered_set<std::string> exclude_snapshot_set;
+ if (auto [code, err] = ensure_all_snapshot_recycled(txn, tail_instance_id,
std::nullopt);
+ code != MetaServiceCode::OK) {
+ return {code, std::move(err)};
+ }
+ exclude_snapshot_set.insert(tail_instance->source_snapshot_id());
+
+ auto remove_snapshot_reference_key = [&](const InstanceInfoPB& instance) {
+ Versionstamp vs;
Review Comment:
**Potential issue: `tail_instance->source_snapshot_id()` may be empty,
inserting empty string into `exclude_snapshot_set`.**
If the tail instance does not have a `source_snapshot_id` (e.g., it's the
original instance that was subsequently given a successor),
`source_snapshot_id()` returns an empty string, which is then inserted into
`exclude_snapshot_set`. This could cause `ensure_all_snapshot_recycled` to skip
snapshots with an empty serialized ID. Consider guarding this with:
```cpp
if (tail_instance->has_source_snapshot_id() &&
!tail_instance->source_snapshot_id().empty()) {
exclude_snapshot_set.insert(tail_instance->source_snapshot_id());
}
```
Same pattern as done for predecessors at line 133.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]