Copilot commented on code in PR #64313:
URL: https://github.com/apache/doris/pull/64313#discussion_r3380192210


##########
cloud/src/recycler/recycler.h:
##########
@@ -57,6 +57,11 @@ class SimpleThreadPool;
 class RecyclerMetricsContext;
 class TabletRecyclerMetricsContext;
 class SegmentRecyclerMetricsContext;
+
+int64_t calculate_tmp_rowset_expired_time(
+        const std::string& instance_id_, const doris::RowsetMetaCloudPB& 
tmp_rowset_meta_pb,
+        int64_t* earlest_ts /* tmp_rowset earliest expiration ts */);

Review Comment:
   Spelling: parameter/comment uses `earlest_ts` (missing 'i'). Consider 
renaming to `earliest_ts` to avoid propagating the typo into new API surface.



##########
cloud/src/recycler/checker.cpp:
##########
@@ -1338,6 +1353,116 @@ int InstanceChecker::do_delete_bitmap_inverted_check() {
     return (leaked_delete_bitmaps > 0 || abnormal_delete_bitmaps > 0) ? 1 : 0;
 }
 
+int InstanceChecker::collect_unexpired_job_tmp_rowsets(
+        std::unordered_map<int64_t, std::unordered_set<std::string>>& 
tmp_rowsets) {
+    static constexpr int64_t max_unexpired_tmp_rowsets = 1000;
+    auto begin = meta_rowset_tmp_key({instance_id_, 0, 0});
+    auto end = meta_rowset_tmp_key({instance_id_, INT64_MAX, 0});
+    std::unique_ptr<RangeGetIterator> it;
+    int64_t num_scanned = 0;
+    int64_t num_non_job = 0;
+    int64_t num_skipped_non_job_txns = 0;
+    int64_t num_unexpired = 0;
+    int64_t num_expired = 0;
+    int64_t last_txn_id = -1;
+    int64_t current_time = 
duration_cast<seconds>(system_clock::now().time_since_epoch()).count();
+
+    while (it == nullptr /* may be not init */ || (it->more() && !stopped())) {
+        std::unique_ptr<Transaction> txn;
+        TxnErrorCode err = txn_kv_->create_txn(&txn);
+        if (err != TxnErrorCode::TXN_OK) {
+            LOG(WARNING) << "failed to create txn";
+            return -1;
+        }
+        err = txn->get(begin, end, &it);
+        if (err != TxnErrorCode::TXN_OK) {
+            LOG(WARNING) << "failed to get tmp rowset kv, err=" << err;
+            return -1;
+        }
+        if (!it->has_next()) {
+            break;
+        }
+        while (it->has_next() && !stopped()) {
+            auto [k, v] = it->next();
+            ++num_scanned;
+
+            std::string_view k1 = k;
+            k1.remove_prefix(1);
+            std::vector<std::tuple<std::variant<int64_t, std::string>, int, 
int>> out;
+            if (decode_key(&k1, &out) != 0 || out.size() < 5) {
+                LOG(WARNING) << "malformed tmp rowset key, key=" << hex(k);
+                return -1;
+            }
+            // 0x01 "meta" ${instance_id} "rowset_tmp" ${txn_id} ${tablet_id} 
-> RowsetMetaCloudPB
+            auto txn_id = std::get<int64_t>(std::get<0>(out[3]));
+            bool is_first_rowset_of_txn = last_txn_id != txn_id;
+            last_txn_id = txn_id;
+
+            doris::RowsetMetaCloudPB rowset;
+            if (!rowset.ParseFromArray(v.data(), v.size())) {
+                LOG(WARNING) << "malformed tmp rowset meta, key=" << hex(k);
+                return -1;
+            }
+            if (!rowset.has_job_id() || rowset.job_id().empty()) {
+                ++num_non_job;
+                if (is_first_rowset_of_txn) {
+                    ++num_skipped_non_job_txns;
+                    if (txn_id == INT64_MAX) {
+                        begin = end;
+                    } else {
+                        begin = meta_rowset_tmp_key({instance_id_, txn_id + 1, 
0});
+                    }
+                    it.reset();
+                    break;
+                }
+                if (!it->has_next()) {
+                    begin = k;
+                    begin.push_back('\x00');
+                }
+                continue;
+            }
+
+            // Must use the same threshold as the recycler so that a delete 
bitmap is never
+            // reported as leaked while its tmp rowset is still alive from the 
recycler's view.
+            // `earlest_ts` is a local sentinel initialized to 0 on purpose: 
it keeps the value
+            // below any real expiration so the helper never updates the 
recycler's
+            // earliest-ts bvar (the checker must not touch the recycler's 
metrics).
+            int64_t earlest_ts = 0;
+            int64_t expiration =
+                    calculate_tmp_rowset_expired_time(instance_id_, rowset, 
&earlest_ts);

Review Comment:
   Spelling: local variable and comment use `earlest_ts` (missing 'i'). Rename 
to `earliest_ts` for clarity (does not affect behavior).



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to